1 2 /* 3 Defines the basic matrix operations for the BAIJ (compressed row) 4 matrix storage format. 5 */ 6 #include <../src/mat/impls/baij/seq/baij.h> /*I "petscmat.h" I*/ 7 #include <petscblaslapack.h> 8 #include <petsc/private/kernels/blockinvert.h> 9 #include <petsc/private/kernels/blockmatmult.h> 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ" 13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values) 14 { 15 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data; 16 PetscErrorCode ierr; 17 PetscInt *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots; 18 MatScalar *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work; 19 PetscReal shift = 0.0; 20 PetscBool allowzeropivot,zeropivotdetected=PETSC_FALSE; 21 22 PetscFunctionBegin; 23 allowzeropivot = PetscNot(A->erroriffailure); 24 25 if (a->idiagvalid) { 26 if (values) *values = a->idiag; 27 PetscFunctionReturn(0); 28 } 29 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 30 diag_offset = a->diag; 31 if (!a->idiag) { 32 ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr); 33 ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr); 34 } 35 diag = a->idiag; 36 mdiag = a->idiag+bs2*mbs; 37 if (values) *values = a->idiag; 38 /* factor and invert each block */ 39 switch (bs) { 40 case 1: 41 for (i=0; i<mbs; i++) { 42 odiag = v + 1*diag_offset[i]; 43 diag[0] = odiag[0]; 44 mdiag[0] = odiag[0]; 45 46 if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) { 47 if (allowzeropivot) { 48 A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 49 ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr); 50 } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D",i); 51 } 52 53 diag[0] = (PetscScalar)1.0 / (diag[0] + shift); 54 diag += 1; 55 mdiag += 1; 56 } 57 break; 58 case 2: 59 for (i=0; i<mbs; i++) { 60 odiag = v + 4*diag_offset[i]; 61 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 62 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 63 ierr = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 64 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 65 diag += 4; 66 mdiag += 4; 67 } 68 break; 69 case 3: 70 for (i=0; i<mbs; i++) { 71 odiag = v + 9*diag_offset[i]; 72 diag[0] = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3]; 73 diag[4] = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7]; 74 diag[8] = odiag[8]; 75 mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3]; 76 mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7]; 77 mdiag[8] = odiag[8]; 78 ierr = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 79 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 80 diag += 9; 81 mdiag += 9; 82 } 83 break; 84 case 4: 85 for (i=0; i<mbs; i++) { 86 odiag = v + 16*diag_offset[i]; 87 ierr = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 88 ierr = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr); 89 ierr = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 90 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 91 diag += 16; 92 mdiag += 16; 93 } 94 break; 95 case 5: 96 for (i=0; i<mbs; i++) { 97 odiag = v + 25*diag_offset[i]; 98 ierr = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 99 ierr = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr); 100 ierr = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 101 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 102 diag += 25; 103 mdiag += 25; 104 } 105 break; 106 case 6: 107 for (i=0; i<mbs; i++) { 108 odiag = v + 36*diag_offset[i]; 109 ierr = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 110 ierr = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr); 111 ierr = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 112 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 113 diag += 36; 114 mdiag += 36; 115 } 116 break; 117 case 7: 118 for (i=0; i<mbs; i++) { 119 odiag = v + 49*diag_offset[i]; 120 ierr = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 121 ierr = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr); 122 ierr = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 123 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 124 diag += 49; 125 mdiag += 49; 126 } 127 break; 128 default: 129 ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr); 130 for (i=0; i<mbs; i++) { 131 odiag = v + bs2*diag_offset[i]; 132 ierr = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 133 ierr = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 134 ierr = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr); 135 if (zeropivotdetected) A->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 136 diag += bs2; 137 mdiag += bs2; 138 } 139 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 140 } 141 a->idiagvalid = PETSC_TRUE; 142 PetscFunctionReturn(0); 143 } 144 145 #undef __FUNCT__ 146 #define __FUNCT__ "MatSOR_SeqBAIJ" 147 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 148 { 149 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 150 PetscScalar *x,*work,*w,*workt,*t; 151 const MatScalar *v,*aa = a->a, *idiag; 152 const PetscScalar *b,*xb; 153 PetscScalar s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */ 154 PetscErrorCode ierr; 155 PetscInt m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it; 156 const PetscInt *diag,*ai = a->i,*aj = a->j,*vi; 157 158 PetscFunctionBegin; 159 if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */ 160 its = its*lits; 161 if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 162 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 163 if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift"); 164 if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor"); 165 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts"); 166 167 if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);} 168 169 if (!m) PetscFunctionReturn(0); 170 diag = a->diag; 171 idiag = a->idiag; 172 k = PetscMax(A->rmap->n,A->cmap->n); 173 if (!a->mult_work) { 174 ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr); 175 } 176 if (!a->sor_workt) { 177 ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr); 178 } 179 if (!a->sor_work) { 180 ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr); 181 } 182 work = a->mult_work; 183 t = a->sor_workt; 184 w = a->sor_work; 185 186 ierr = VecGetArray(xx,&x);CHKERRQ(ierr); 187 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 188 189 if (flag & SOR_ZERO_INITIAL_GUESS) { 190 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 191 switch (bs) { 192 case 1: 193 PetscKernel_v_gets_A_times_w_1(x,idiag,b); 194 t[0] = b[0]; 195 i2 = 1; 196 idiag += 1; 197 for (i=1; i<m; i++) { 198 v = aa + ai[i]; 199 vi = aj + ai[i]; 200 nz = diag[i] - ai[i]; 201 s[0] = b[i2]; 202 for (j=0; j<nz; j++) { 203 xw[0] = x[vi[j]]; 204 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 205 } 206 t[i2] = s[0]; 207 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 208 x[i2] = xw[0]; 209 idiag += 1; 210 i2 += 1; 211 } 212 break; 213 case 2: 214 PetscKernel_v_gets_A_times_w_2(x,idiag,b); 215 t[0] = b[0]; t[1] = b[1]; 216 i2 = 2; 217 idiag += 4; 218 for (i=1; i<m; i++) { 219 v = aa + 4*ai[i]; 220 vi = aj + ai[i]; 221 nz = diag[i] - ai[i]; 222 s[0] = b[i2]; s[1] = b[i2+1]; 223 for (j=0; j<nz; j++) { 224 idx = 2*vi[j]; 225 it = 4*j; 226 xw[0] = x[idx]; xw[1] = x[1+idx]; 227 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 228 } 229 t[i2] = s[0]; t[i2+1] = s[1]; 230 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 231 x[i2] = xw[0]; x[i2+1] = xw[1]; 232 idiag += 4; 233 i2 += 2; 234 } 235 break; 236 case 3: 237 PetscKernel_v_gets_A_times_w_3(x,idiag,b); 238 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 239 i2 = 3; 240 idiag += 9; 241 for (i=1; i<m; i++) { 242 v = aa + 9*ai[i]; 243 vi = aj + ai[i]; 244 nz = diag[i] - ai[i]; 245 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 246 while (nz--) { 247 idx = 3*(*vi++); 248 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 249 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 250 v += 9; 251 } 252 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 253 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 254 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 255 idiag += 9; 256 i2 += 3; 257 } 258 break; 259 case 4: 260 PetscKernel_v_gets_A_times_w_4(x,idiag,b); 261 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; 262 i2 = 4; 263 idiag += 16; 264 for (i=1; i<m; i++) { 265 v = aa + 16*ai[i]; 266 vi = aj + ai[i]; 267 nz = diag[i] - ai[i]; 268 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 269 while (nz--) { 270 idx = 4*(*vi++); 271 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 272 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 273 v += 16; 274 } 275 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3]; 276 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 277 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 278 idiag += 16; 279 i2 += 4; 280 } 281 break; 282 case 5: 283 PetscKernel_v_gets_A_times_w_5(x,idiag,b); 284 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; 285 i2 = 5; 286 idiag += 25; 287 for (i=1; i<m; i++) { 288 v = aa + 25*ai[i]; 289 vi = aj + ai[i]; 290 nz = diag[i] - ai[i]; 291 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 292 while (nz--) { 293 idx = 5*(*vi++); 294 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 295 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 296 v += 25; 297 } 298 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4]; 299 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 300 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 301 idiag += 25; 302 i2 += 5; 303 } 304 break; 305 case 6: 306 PetscKernel_v_gets_A_times_w_6(x,idiag,b); 307 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; 308 i2 = 6; 309 idiag += 36; 310 for (i=1; i<m; i++) { 311 v = aa + 36*ai[i]; 312 vi = aj + ai[i]; 313 nz = diag[i] - ai[i]; 314 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 315 while (nz--) { 316 idx = 6*(*vi++); 317 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 318 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 319 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 320 v += 36; 321 } 322 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 323 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; 324 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 325 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 326 idiag += 36; 327 i2 += 6; 328 } 329 break; 330 case 7: 331 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 332 t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; 333 t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6]; 334 i2 = 7; 335 idiag += 49; 336 for (i=1; i<m; i++) { 337 v = aa + 49*ai[i]; 338 vi = aj + ai[i]; 339 nz = diag[i] - ai[i]; 340 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 341 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 342 while (nz--) { 343 idx = 7*(*vi++); 344 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 345 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 346 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 347 v += 49; 348 } 349 t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; 350 t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6]; 351 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 352 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 353 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 354 idiag += 49; 355 i2 += 7; 356 } 357 break; 358 default: 359 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); 360 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 361 i2 = bs; 362 idiag += bs2; 363 for (i=1; i<m; i++) { 364 v = aa + bs2*ai[i]; 365 vi = aj + ai[i]; 366 nz = diag[i] - ai[i]; 367 368 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 369 /* copy all rows of x that are needed into contiguous space */ 370 workt = work; 371 for (j=0; j<nz; j++) { 372 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 373 workt += bs; 374 } 375 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 376 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 377 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 378 379 idiag += bs2; 380 i2 += bs; 381 } 382 break; 383 } 384 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 385 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 386 xb = t; 387 } 388 else xb = b; 389 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 390 idiag = a->idiag+bs2*(a->mbs-1); 391 i2 = bs * (m-1); 392 switch (bs) { 393 case 1: 394 s[0] = xb[i2]; 395 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 396 x[i2] = xw[0]; 397 i2 -= 1; 398 for (i=m-2; i>=0; i--) { 399 v = aa + (diag[i]+1); 400 vi = aj + diag[i] + 1; 401 nz = ai[i+1] - diag[i] - 1; 402 s[0] = xb[i2]; 403 for (j=0; j<nz; j++) { 404 xw[0] = x[vi[j]]; 405 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 406 } 407 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 408 x[i2] = xw[0]; 409 idiag -= 1; 410 i2 -= 1; 411 } 412 break; 413 case 2: 414 s[0] = xb[i2]; s[1] = xb[i2+1]; 415 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 416 x[i2] = xw[0]; x[i2+1] = xw[1]; 417 i2 -= 2; 418 idiag -= 4; 419 for (i=m-2; i>=0; i--) { 420 v = aa + 4*(diag[i] + 1); 421 vi = aj + diag[i] + 1; 422 nz = ai[i+1] - diag[i] - 1; 423 s[0] = xb[i2]; s[1] = xb[i2+1]; 424 for (j=0; j<nz; j++) { 425 idx = 2*vi[j]; 426 it = 4*j; 427 xw[0] = x[idx]; xw[1] = x[1+idx]; 428 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 429 } 430 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 431 x[i2] = xw[0]; x[i2+1] = xw[1]; 432 idiag -= 4; 433 i2 -= 2; 434 } 435 break; 436 case 3: 437 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 438 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 439 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 440 i2 -= 3; 441 idiag -= 9; 442 for (i=m-2; i>=0; i--) { 443 v = aa + 9*(diag[i]+1); 444 vi = aj + diag[i] + 1; 445 nz = ai[i+1] - diag[i] - 1; 446 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 447 while (nz--) { 448 idx = 3*(*vi++); 449 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 450 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 451 v += 9; 452 } 453 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 454 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 455 idiag -= 9; 456 i2 -= 3; 457 } 458 break; 459 case 4: 460 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 461 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 462 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 463 i2 -= 4; 464 idiag -= 16; 465 for (i=m-2; i>=0; i--) { 466 v = aa + 16*(diag[i]+1); 467 vi = aj + diag[i] + 1; 468 nz = ai[i+1] - diag[i] - 1; 469 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; 470 while (nz--) { 471 idx = 4*(*vi++); 472 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 473 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 474 v += 16; 475 } 476 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 477 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; 478 idiag -= 16; 479 i2 -= 4; 480 } 481 break; 482 case 5: 483 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 484 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 485 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 486 i2 -= 5; 487 idiag -= 25; 488 for (i=m-2; i>=0; i--) { 489 v = aa + 25*(diag[i]+1); 490 vi = aj + diag[i] + 1; 491 nz = ai[i+1] - diag[i] - 1; 492 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; 493 while (nz--) { 494 idx = 5*(*vi++); 495 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 496 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 497 v += 25; 498 } 499 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 500 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; 501 idiag -= 25; 502 i2 -= 5; 503 } 504 break; 505 case 6: 506 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 507 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 508 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 509 i2 -= 6; 510 idiag -= 36; 511 for (i=m-2; i>=0; i--) { 512 v = aa + 36*(diag[i]+1); 513 vi = aj + diag[i] + 1; 514 nz = ai[i+1] - diag[i] - 1; 515 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; 516 while (nz--) { 517 idx = 6*(*vi++); 518 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 519 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 520 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 521 v += 36; 522 } 523 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 524 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; 525 idiag -= 36; 526 i2 -= 6; 527 } 528 break; 529 case 7: 530 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 531 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 532 PetscKernel_v_gets_A_times_w_7(x,idiag,b); 533 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 534 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 535 i2 -= 7; 536 idiag -= 49; 537 for (i=m-2; i>=0; i--) { 538 v = aa + 49*(diag[i]+1); 539 vi = aj + diag[i] + 1; 540 nz = ai[i+1] - diag[i] - 1; 541 s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; 542 s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6]; 543 while (nz--) { 544 idx = 7*(*vi++); 545 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 546 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 547 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 548 v += 49; 549 } 550 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 551 x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; 552 x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6]; 553 idiag -= 49; 554 i2 -= 7; 555 } 556 break; 557 default: 558 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 559 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 560 i2 -= bs; 561 idiag -= bs2; 562 for (i=m-2; i>=0; i--) { 563 v = aa + bs2*(diag[i]+1); 564 vi = aj + diag[i] + 1; 565 nz = ai[i+1] - diag[i] - 1; 566 567 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 568 /* copy all rows of x that are needed into contiguous space */ 569 workt = work; 570 for (j=0; j<nz; j++) { 571 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 572 workt += bs; 573 } 574 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 575 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 576 577 idiag -= bs2; 578 i2 -= bs; 579 } 580 break; 581 } 582 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 583 } 584 its--; 585 } 586 while (its--) { 587 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 588 idiag = a->idiag; 589 i2 = 0; 590 switch (bs) { 591 case 1: 592 for (i=0; i<m; i++) { 593 v = aa + ai[i]; 594 vi = aj + ai[i]; 595 nz = ai[i+1] - ai[i]; 596 s[0] = b[i2]; 597 for (j=0; j<nz; j++) { 598 xw[0] = x[vi[j]]; 599 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 600 } 601 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 602 x[i2] += xw[0]; 603 idiag += 1; 604 i2 += 1; 605 } 606 break; 607 case 2: 608 for (i=0; i<m; i++) { 609 v = aa + 4*ai[i]; 610 vi = aj + ai[i]; 611 nz = ai[i+1] - ai[i]; 612 s[0] = b[i2]; s[1] = b[i2+1]; 613 for (j=0; j<nz; j++) { 614 idx = 2*vi[j]; 615 it = 4*j; 616 xw[0] = x[idx]; xw[1] = x[1+idx]; 617 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 618 } 619 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 620 x[i2] += xw[0]; x[i2+1] += xw[1]; 621 idiag += 4; 622 i2 += 2; 623 } 624 break; 625 case 3: 626 for (i=0; i<m; i++) { 627 v = aa + 9*ai[i]; 628 vi = aj + ai[i]; 629 nz = ai[i+1] - ai[i]; 630 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 631 while (nz--) { 632 idx = 3*(*vi++); 633 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 634 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 635 v += 9; 636 } 637 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 638 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 639 idiag += 9; 640 i2 += 3; 641 } 642 break; 643 case 4: 644 for (i=0; i<m; i++) { 645 v = aa + 16*ai[i]; 646 vi = aj + ai[i]; 647 nz = ai[i+1] - ai[i]; 648 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 649 while (nz--) { 650 idx = 4*(*vi++); 651 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 652 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 653 v += 16; 654 } 655 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 656 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 657 idiag += 16; 658 i2 += 4; 659 } 660 break; 661 case 5: 662 for (i=0; i<m; i++) { 663 v = aa + 25*ai[i]; 664 vi = aj + ai[i]; 665 nz = ai[i+1] - ai[i]; 666 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 667 while (nz--) { 668 idx = 5*(*vi++); 669 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 670 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 671 v += 25; 672 } 673 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 674 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 675 idiag += 25; 676 i2 += 5; 677 } 678 break; 679 case 6: 680 for (i=0; i<m; i++) { 681 v = aa + 36*ai[i]; 682 vi = aj + ai[i]; 683 nz = ai[i+1] - ai[i]; 684 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 685 while (nz--) { 686 idx = 6*(*vi++); 687 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 688 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 689 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 690 v += 36; 691 } 692 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 693 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 694 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 695 idiag += 36; 696 i2 += 6; 697 } 698 break; 699 case 7: 700 for (i=0; i<m; i++) { 701 v = aa + 49*ai[i]; 702 vi = aj + ai[i]; 703 nz = ai[i+1] - ai[i]; 704 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 705 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 706 while (nz--) { 707 idx = 7*(*vi++); 708 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 709 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 710 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 711 v += 49; 712 } 713 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 714 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 715 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 716 idiag += 49; 717 i2 += 7; 718 } 719 break; 720 default: 721 for (i=0; i<m; i++) { 722 v = aa + bs2*ai[i]; 723 vi = aj + ai[i]; 724 nz = ai[i+1] - ai[i]; 725 726 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 727 /* copy all rows of x that are needed into contiguous space */ 728 workt = work; 729 for (j=0; j<nz; j++) { 730 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 731 workt += bs; 732 } 733 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 734 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 735 736 idiag += bs2; 737 i2 += bs; 738 } 739 break; 740 } 741 ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr); 742 } 743 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 744 idiag = a->idiag+bs2*(a->mbs-1); 745 i2 = bs * (m-1); 746 switch (bs) { 747 case 1: 748 for (i=m-1; i>=0; i--) { 749 v = aa + ai[i]; 750 vi = aj + ai[i]; 751 nz = ai[i+1] - ai[i]; 752 s[0] = b[i2]; 753 for (j=0; j<nz; j++) { 754 xw[0] = x[vi[j]]; 755 PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw); 756 } 757 PetscKernel_v_gets_A_times_w_1(xw,idiag,s); 758 x[i2] += xw[0]; 759 idiag -= 1; 760 i2 -= 1; 761 } 762 break; 763 case 2: 764 for (i=m-1; i>=0; i--) { 765 v = aa + 4*ai[i]; 766 vi = aj + ai[i]; 767 nz = ai[i+1] - ai[i]; 768 s[0] = b[i2]; s[1] = b[i2+1]; 769 for (j=0; j<nz; j++) { 770 idx = 2*vi[j]; 771 it = 4*j; 772 xw[0] = x[idx]; xw[1] = x[1+idx]; 773 PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw); 774 } 775 PetscKernel_v_gets_A_times_w_2(xw,idiag,s); 776 x[i2] += xw[0]; x[i2+1] += xw[1]; 777 idiag -= 4; 778 i2 -= 2; 779 } 780 break; 781 case 3: 782 for (i=m-1; i>=0; i--) { 783 v = aa + 9*ai[i]; 784 vi = aj + ai[i]; 785 nz = ai[i+1] - ai[i]; 786 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 787 while (nz--) { 788 idx = 3*(*vi++); 789 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 790 PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw); 791 v += 9; 792 } 793 PetscKernel_v_gets_A_times_w_3(xw,idiag,s); 794 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 795 idiag -= 9; 796 i2 -= 3; 797 } 798 break; 799 case 4: 800 for (i=m-1; i>=0; i--) { 801 v = aa + 16*ai[i]; 802 vi = aj + ai[i]; 803 nz = ai[i+1] - ai[i]; 804 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; 805 while (nz--) { 806 idx = 4*(*vi++); 807 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; 808 PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw); 809 v += 16; 810 } 811 PetscKernel_v_gets_A_times_w_4(xw,idiag,s); 812 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; 813 idiag -= 16; 814 i2 -= 4; 815 } 816 break; 817 case 5: 818 for (i=m-1; i>=0; i--) { 819 v = aa + 25*ai[i]; 820 vi = aj + ai[i]; 821 nz = ai[i+1] - ai[i]; 822 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; 823 while (nz--) { 824 idx = 5*(*vi++); 825 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx]; 826 PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw); 827 v += 25; 828 } 829 PetscKernel_v_gets_A_times_w_5(xw,idiag,s); 830 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4]; 831 idiag -= 25; 832 i2 -= 5; 833 } 834 break; 835 case 6: 836 for (i=m-1; i>=0; i--) { 837 v = aa + 36*ai[i]; 838 vi = aj + ai[i]; 839 nz = ai[i+1] - ai[i]; 840 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; 841 while (nz--) { 842 idx = 6*(*vi++); 843 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 844 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; 845 PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw); 846 v += 36; 847 } 848 PetscKernel_v_gets_A_times_w_6(xw,idiag,s); 849 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 850 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; 851 idiag -= 36; 852 i2 -= 6; 853 } 854 break; 855 case 7: 856 for (i=m-1; i>=0; i--) { 857 v = aa + 49*ai[i]; 858 vi = aj + ai[i]; 859 nz = ai[i+1] - ai[i]; 860 s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; 861 s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6]; 862 while (nz--) { 863 idx = 7*(*vi++); 864 xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; 865 xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx]; 866 PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw); 867 v += 49; 868 } 869 PetscKernel_v_gets_A_times_w_7(xw,idiag,s); 870 x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; 871 x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6]; 872 idiag -= 49; 873 i2 -= 7; 874 } 875 break; 876 default: 877 for (i=m-1; i>=0; i--) { 878 v = aa + bs2*ai[i]; 879 vi = aj + ai[i]; 880 nz = ai[i+1] - ai[i]; 881 882 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 883 /* copy all rows of x that are needed into contiguous space */ 884 workt = work; 885 for (j=0; j<nz; j++) { 886 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 887 workt += bs; 888 } 889 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work); 890 PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2); 891 892 idiag -= bs2; 893 i2 -= bs; 894 } 895 break; 896 } 897 ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr); 898 } 899 } 900 ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr); 901 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 902 PetscFunctionReturn(0); 903 } 904 905 906 /* 907 Special version for direct calls from Fortran (Used in PETSc-fun3d) 908 */ 909 #if defined(PETSC_HAVE_FORTRAN_CAPS) 910 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4 911 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 912 #define matsetvaluesblocked4_ matsetvaluesblocked4 913 #endif 914 915 #undef __FUNCT__ 916 #define __FUNCT__ "matsetvaluesblocked4_" 917 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[]) 918 { 919 Mat A = *AA; 920 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 921 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn; 922 PetscInt *ai =a->i,*ailen=a->ilen; 923 PetscInt *aj =a->j,stepval,lastcol = -1; 924 const PetscScalar *value = v; 925 MatScalar *ap,*aa = a->a,*bap; 926 927 PetscFunctionBegin; 928 if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4"); 929 stepval = (n-1)*4; 930 for (k=0; k<m; k++) { /* loop over added rows */ 931 row = im[k]; 932 rp = aj + ai[row]; 933 ap = aa + 16*ai[row]; 934 nrow = ailen[row]; 935 low = 0; 936 high = nrow; 937 for (l=0; l<n; l++) { /* loop over added columns */ 938 col = in[l]; 939 if (col <= lastcol) low = 0; 940 else high = nrow; 941 lastcol = col; 942 value = v + k*(stepval+4 + l)*4; 943 while (high-low > 7) { 944 t = (low+high)/2; 945 if (rp[t] > col) high = t; 946 else low = t; 947 } 948 for (i=low; i<high; i++) { 949 if (rp[i] > col) break; 950 if (rp[i] == col) { 951 bap = ap + 16*i; 952 for (ii=0; ii<4; ii++,value+=stepval) { 953 for (jj=ii; jj<16; jj+=4) { 954 bap[jj] += *value++; 955 } 956 } 957 goto noinsert2; 958 } 959 } 960 N = nrow++ - 1; 961 high++; /* added new column index thus must search to one higher than before */ 962 /* shift up all the later entries in this row */ 963 for (ii=N; ii>=i; ii--) { 964 rp[ii+1] = rp[ii]; 965 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 966 } 967 if (N >= i) { 968 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 969 } 970 rp[i] = col; 971 bap = ap + 16*i; 972 for (ii=0; ii<4; ii++,value+=stepval) { 973 for (jj=ii; jj<16; jj+=4) { 974 bap[jj] = *value++; 975 } 976 } 977 noinsert2:; 978 low = i; 979 } 980 ailen[row] = nrow; 981 } 982 PetscFunctionReturnVoid(); 983 } 984 985 #if defined(PETSC_HAVE_FORTRAN_CAPS) 986 #define matsetvalues4_ MATSETVALUES4 987 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 988 #define matsetvalues4_ matsetvalues4 989 #endif 990 991 #undef __FUNCT__ 992 #define __FUNCT__ "MatSetValues4_" 993 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v) 994 { 995 Mat A = *AA; 996 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 997 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm; 998 PetscInt *ai=a->i,*ailen=a->ilen; 999 PetscInt *aj=a->j,brow,bcol; 1000 PetscInt ridx,cidx,lastcol = -1; 1001 MatScalar *ap,value,*aa=a->a,*bap; 1002 1003 PetscFunctionBegin; 1004 for (k=0; k<m; k++) { /* loop over added rows */ 1005 row = im[k]; brow = row/4; 1006 rp = aj + ai[brow]; 1007 ap = aa + 16*ai[brow]; 1008 nrow = ailen[brow]; 1009 low = 0; 1010 high = nrow; 1011 for (l=0; l<n; l++) { /* loop over added columns */ 1012 col = in[l]; bcol = col/4; 1013 ridx = row % 4; cidx = col % 4; 1014 value = v[l + k*n]; 1015 if (col <= lastcol) low = 0; 1016 else high = nrow; 1017 lastcol = col; 1018 while (high-low > 7) { 1019 t = (low+high)/2; 1020 if (rp[t] > bcol) high = t; 1021 else low = t; 1022 } 1023 for (i=low; i<high; i++) { 1024 if (rp[i] > bcol) break; 1025 if (rp[i] == bcol) { 1026 bap = ap + 16*i + 4*cidx + ridx; 1027 *bap += value; 1028 goto noinsert1; 1029 } 1030 } 1031 N = nrow++ - 1; 1032 high++; /* added new column thus must search to one higher than before */ 1033 /* shift up all the later entries in this row */ 1034 for (ii=N; ii>=i; ii--) { 1035 rp[ii+1] = rp[ii]; 1036 PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar)); 1037 } 1038 if (N>=i) { 1039 PetscMemzero(ap+16*i,16*sizeof(MatScalar)); 1040 } 1041 rp[i] = bcol; 1042 ap[16*i + 4*cidx + ridx] = value; 1043 noinsert1:; 1044 low = i; 1045 } 1046 ailen[brow] = nrow; 1047 } 1048 PetscFunctionReturnVoid(); 1049 } 1050 1051 /* 1052 Checks for missing diagonals 1053 */ 1054 #undef __FUNCT__ 1055 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ" 1056 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool *missing,PetscInt *d) 1057 { 1058 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1059 PetscErrorCode ierr; 1060 PetscInt *diag,*ii = a->i,i; 1061 1062 PetscFunctionBegin; 1063 ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr); 1064 *missing = PETSC_FALSE; 1065 if (A->rmap->n > 0 && !ii) { 1066 *missing = PETSC_TRUE; 1067 if (d) *d = 0; 1068 PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n"); 1069 } else { 1070 diag = a->diag; 1071 for (i=0; i<a->mbs; i++) { 1072 if (diag[i] >= ii[i+1]) { 1073 *missing = PETSC_TRUE; 1074 if (d) *d = i; 1075 PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i); 1076 break; 1077 } 1078 } 1079 } 1080 PetscFunctionReturn(0); 1081 } 1082 1083 #undef __FUNCT__ 1084 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ" 1085 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A) 1086 { 1087 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt i,j,m = a->mbs; 1090 1091 PetscFunctionBegin; 1092 if (!a->diag) { 1093 ierr = PetscMalloc1(m,&a->diag);CHKERRQ(ierr); 1094 ierr = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr); 1095 a->free_diag = PETSC_TRUE; 1096 } 1097 for (i=0; i<m; i++) { 1098 a->diag[i] = a->i[i+1]; 1099 for (j=a->i[i]; j<a->i[i+1]; j++) { 1100 if (a->j[j] == i) { 1101 a->diag[i] = j; 1102 break; 1103 } 1104 } 1105 } 1106 PetscFunctionReturn(0); 1107 } 1108 1109 1110 #undef __FUNCT__ 1111 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ" 1112 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool *done) 1113 { 1114 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1115 PetscErrorCode ierr; 1116 PetscInt i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt; 1117 PetscInt **ia = (PetscInt**)inia,**ja = (PetscInt**)inja; 1118 1119 PetscFunctionBegin; 1120 *nn = n; 1121 if (!ia) PetscFunctionReturn(0); 1122 if (symmetric) { 1123 ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr); 1124 nz = tia[n]; 1125 } else { 1126 tia = a->i; tja = a->j; 1127 } 1128 1129 if (!blockcompressed && bs > 1) { 1130 (*nn) *= bs; 1131 /* malloc & create the natural set of indices */ 1132 ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr); 1133 if (n) { 1134 (*ia)[0] = 0; 1135 for (j=1; j<bs; j++) { 1136 (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1]; 1137 } 1138 } 1139 1140 for (i=1; i<n; i++) { 1141 (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1]; 1142 for (j=1; j<bs; j++) { 1143 (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1]; 1144 } 1145 } 1146 if (n) { 1147 (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1]; 1148 } 1149 1150 if (inja) { 1151 ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr); 1152 cnt = 0; 1153 for (i=0; i<n; i++) { 1154 for (j=0; j<bs; j++) { 1155 for (k=tia[i]; k<tia[i+1]; k++) { 1156 for (l=0; l<bs; l++) { 1157 (*ja)[cnt++] = bs*tja[k] + l; 1158 } 1159 } 1160 } 1161 } 1162 } 1163 1164 if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */ 1165 ierr = PetscFree(tia);CHKERRQ(ierr); 1166 ierr = PetscFree(tja);CHKERRQ(ierr); 1167 } 1168 } else if (oshift == 1) { 1169 if (symmetric) { 1170 nz = tia[A->rmap->n/bs]; 1171 /* add 1 to i and j indices */ 1172 for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1; 1173 *ia = tia; 1174 if (ja) { 1175 for (i=0; i<nz; i++) tja[i] = tja[i] + 1; 1176 *ja = tja; 1177 } 1178 } else { 1179 nz = a->i[A->rmap->n/bs]; 1180 /* malloc space and add 1 to i and j indices */ 1181 ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr); 1182 for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1; 1183 if (ja) { 1184 ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr); 1185 for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1; 1186 } 1187 } 1188 } else { 1189 *ia = tia; 1190 if (ja) *ja = tja; 1191 } 1192 PetscFunctionReturn(0); 1193 } 1194 1195 #undef __FUNCT__ 1196 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ" 1197 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 1198 { 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 if (!ia) PetscFunctionReturn(0); 1203 if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) { 1204 ierr = PetscFree(*ia);CHKERRQ(ierr); 1205 if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);} 1206 } 1207 PetscFunctionReturn(0); 1208 } 1209 1210 #undef __FUNCT__ 1211 #define __FUNCT__ "MatDestroy_SeqBAIJ" 1212 PetscErrorCode MatDestroy_SeqBAIJ(Mat A) 1213 { 1214 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1215 PetscErrorCode ierr; 1216 1217 PetscFunctionBegin; 1218 #if defined(PETSC_USE_LOG) 1219 PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz); 1220 #endif 1221 ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr); 1222 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 1223 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 1224 if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);} 1225 ierr = PetscFree(a->idiag);CHKERRQ(ierr); 1226 if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);} 1227 ierr = PetscFree(a->solve_work);CHKERRQ(ierr); 1228 ierr = PetscFree(a->mult_work);CHKERRQ(ierr); 1229 ierr = PetscFree(a->sor_workt);CHKERRQ(ierr); 1230 ierr = PetscFree(a->sor_work);CHKERRQ(ierr); 1231 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 1232 ierr = PetscFree(a->saved_values);CHKERRQ(ierr); 1233 ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr); 1234 1235 ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr); 1236 ierr = MatDestroy(&a->parent);CHKERRQ(ierr); 1237 ierr = PetscFree(A->data);CHKERRQ(ierr); 1238 1239 ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr); 1240 ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr); 1241 ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr); 1242 ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr); 1243 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr); 1244 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr); 1245 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr); 1246 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr); 1247 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr); 1248 ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr); 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr); 1250 PetscFunctionReturn(0); 1251 } 1252 1253 #undef __FUNCT__ 1254 #define __FUNCT__ "MatSetOption_SeqBAIJ" 1255 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg) 1256 { 1257 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1258 PetscErrorCode ierr; 1259 1260 PetscFunctionBegin; 1261 switch (op) { 1262 case MAT_ROW_ORIENTED: 1263 a->roworiented = flg; 1264 break; 1265 case MAT_KEEP_NONZERO_PATTERN: 1266 a->keepnonzeropattern = flg; 1267 break; 1268 case MAT_NEW_NONZERO_LOCATIONS: 1269 a->nonew = (flg ? 0 : 1); 1270 break; 1271 case MAT_NEW_NONZERO_LOCATION_ERR: 1272 a->nonew = (flg ? -1 : 0); 1273 break; 1274 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1275 a->nonew = (flg ? -2 : 0); 1276 break; 1277 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1278 a->nounused = (flg ? -1 : 0); 1279 break; 1280 case MAT_NEW_DIAGONALS: 1281 case MAT_IGNORE_OFF_PROC_ENTRIES: 1282 case MAT_USE_HASH_TABLE: 1283 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 1284 break; 1285 case MAT_SPD: 1286 case MAT_SYMMETRIC: 1287 case MAT_STRUCTURALLY_SYMMETRIC: 1288 case MAT_HERMITIAN: 1289 case MAT_SYMMETRY_ETERNAL: 1290 /* These options are handled directly by MatSetOption() */ 1291 break; 1292 default: 1293 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op); 1294 } 1295 PetscFunctionReturn(0); 1296 } 1297 1298 /* used for both SeqBAIJ and SeqSBAIJ matrices */ 1299 #undef __FUNCT__ 1300 #define __FUNCT__ "MatGetRow_SeqBAIJ_private" 1301 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa) 1302 { 1303 PetscErrorCode ierr; 1304 PetscInt itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2; 1305 MatScalar *aa_i; 1306 PetscScalar *v_i; 1307 1308 PetscFunctionBegin; 1309 bs = A->rmap->bs; 1310 bs2 = bs*bs; 1311 if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row); 1312 1313 bn = row/bs; /* Block number */ 1314 bp = row % bs; /* Block Position */ 1315 M = ai[bn+1] - ai[bn]; 1316 *nz = bs*M; 1317 1318 if (v) { 1319 *v = 0; 1320 if (*nz) { 1321 ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr); 1322 for (i=0; i<M; i++) { /* for each block in the block row */ 1323 v_i = *v + i*bs; 1324 aa_i = aa + bs2*(ai[bn] + i); 1325 for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j]; 1326 } 1327 } 1328 } 1329 1330 if (idx) { 1331 *idx = 0; 1332 if (*nz) { 1333 ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr); 1334 for (i=0; i<M; i++) { /* for each block in the block row */ 1335 idx_i = *idx + i*bs; 1336 itmp = bs*aj[ai[bn] + i]; 1337 for (j=0; j<bs; j++) idx_i[j] = itmp++; 1338 } 1339 } 1340 } 1341 PetscFunctionReturn(0); 1342 } 1343 1344 #undef __FUNCT__ 1345 #define __FUNCT__ "MatGetRow_SeqBAIJ" 1346 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1347 { 1348 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1349 PetscErrorCode ierr; 1350 1351 PetscFunctionBegin; 1352 ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr); 1353 PetscFunctionReturn(0); 1354 } 1355 1356 #undef __FUNCT__ 1357 #define __FUNCT__ "MatRestoreRow_SeqBAIJ" 1358 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1359 { 1360 PetscErrorCode ierr; 1361 1362 PetscFunctionBegin; 1363 if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);} 1364 if (v) {ierr = PetscFree(*v);CHKERRQ(ierr);} 1365 PetscFunctionReturn(0); 1366 } 1367 1368 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode); 1369 1370 #undef __FUNCT__ 1371 #define __FUNCT__ "MatTranspose_SeqBAIJ" 1372 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B) 1373 { 1374 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1375 Mat C; 1376 PetscErrorCode ierr; 1377 PetscInt i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col; 1378 PetscInt *rows,*cols,bs2=a->bs2; 1379 MatScalar *array; 1380 1381 PetscFunctionBegin; 1382 if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place"); 1383 if (reuse == MAT_INITIAL_MATRIX || A == *B) { 1384 ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr); 1385 1386 for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1; 1387 ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr); 1388 ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr); 1389 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 1390 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr); 1391 ierr = PetscFree(col);CHKERRQ(ierr); 1392 } else { 1393 C = *B; 1394 } 1395 1396 array = a->a; 1397 ierr = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr); 1398 for (i=0; i<mbs; i++) { 1399 cols[0] = i*bs; 1400 for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1; 1401 len = ai[i+1] - ai[i]; 1402 for (j=0; j<len; j++) { 1403 rows[0] = (*aj++)*bs; 1404 for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1; 1405 ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr); 1406 array += bs2; 1407 } 1408 } 1409 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1410 1411 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1412 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1413 1414 if (reuse == MAT_INITIAL_MATRIX || *B != A) { 1415 *B = C; 1416 } else { 1417 ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr); 1418 } 1419 PetscFunctionReturn(0); 1420 } 1421 1422 #undef __FUNCT__ 1423 #define __FUNCT__ "MatIsTranspose_SeqBAIJ" 1424 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool *f) 1425 { 1426 PetscErrorCode ierr; 1427 Mat Btrans; 1428 1429 PetscFunctionBegin; 1430 *f = PETSC_FALSE; 1431 ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr); 1432 ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr); 1433 ierr = MatDestroy(&Btrans);CHKERRQ(ierr); 1434 PetscFunctionReturn(0); 1435 } 1436 1437 #undef __FUNCT__ 1438 #define __FUNCT__ "MatView_SeqBAIJ_Binary" 1439 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer) 1440 { 1441 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1442 PetscErrorCode ierr; 1443 PetscInt i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2; 1444 int fd; 1445 PetscScalar *aa; 1446 FILE *file; 1447 1448 PetscFunctionBegin; 1449 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 1450 ierr = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr); 1451 col_lens[0] = MAT_FILE_CLASSID; 1452 1453 col_lens[1] = A->rmap->N; 1454 col_lens[2] = A->cmap->n; 1455 col_lens[3] = a->nz*bs2; 1456 1457 /* store lengths of each row and write (including header) to file */ 1458 count = 0; 1459 for (i=0; i<a->mbs; i++) { 1460 for (j=0; j<bs; j++) { 1461 col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]); 1462 } 1463 } 1464 ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr); 1465 ierr = PetscFree(col_lens);CHKERRQ(ierr); 1466 1467 /* store column indices (zero start index) */ 1468 ierr = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr); 1469 count = 0; 1470 for (i=0; i<a->mbs; i++) { 1471 for (j=0; j<bs; j++) { 1472 for (k=a->i[i]; k<a->i[i+1]; k++) { 1473 for (l=0; l<bs; l++) { 1474 jj[count++] = bs*a->j[k] + l; 1475 } 1476 } 1477 } 1478 } 1479 ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr); 1480 ierr = PetscFree(jj);CHKERRQ(ierr); 1481 1482 /* store nonzero values */ 1483 ierr = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr); 1484 count = 0; 1485 for (i=0; i<a->mbs; i++) { 1486 for (j=0; j<bs; j++) { 1487 for (k=a->i[i]; k<a->i[i+1]; k++) { 1488 for (l=0; l<bs; l++) { 1489 aa[count++] = a->a[bs2*k + l*bs + j]; 1490 } 1491 } 1492 } 1493 } 1494 ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr); 1495 ierr = PetscFree(aa);CHKERRQ(ierr); 1496 1497 ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr); 1498 if (file) { 1499 fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs); 1500 } 1501 PetscFunctionReturn(0); 1502 } 1503 1504 #undef __FUNCT__ 1505 #define __FUNCT__ "MatView_SeqBAIJ_ASCII" 1506 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer) 1507 { 1508 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1509 PetscErrorCode ierr; 1510 PetscInt i,j,bs = A->rmap->bs,k,l,bs2=a->bs2; 1511 PetscViewerFormat format; 1512 1513 PetscFunctionBegin; 1514 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1515 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1516 ierr = PetscViewerASCIIPrintf(viewer," block size is %D\n",bs);CHKERRQ(ierr); 1517 } else if (format == PETSC_VIEWER_ASCII_MATLAB) { 1518 const char *matname; 1519 Mat aij; 1520 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr); 1521 ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr); 1522 ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr); 1523 ierr = MatView(aij,viewer);CHKERRQ(ierr); 1524 ierr = MatDestroy(&aij);CHKERRQ(ierr); 1525 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1526 PetscFunctionReturn(0); 1527 } else if (format == PETSC_VIEWER_ASCII_COMMON) { 1528 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1529 for (i=0; i<a->mbs; i++) { 1530 for (j=0; j<bs; j++) { 1531 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1532 for (k=a->i[i]; k<a->i[i+1]; k++) { 1533 for (l=0; l<bs; l++) { 1534 #if defined(PETSC_USE_COMPLEX) 1535 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1536 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l, 1537 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1538 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1539 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l, 1540 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1541 } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) { 1542 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1543 } 1544 #else 1545 if (a->a[bs2*k + l*bs + j] != 0.0) { 1546 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1547 } 1548 #endif 1549 } 1550 } 1551 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1552 } 1553 } 1554 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1555 } else { 1556 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr); 1557 for (i=0; i<a->mbs; i++) { 1558 for (j=0; j<bs; j++) { 1559 ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr); 1560 for (k=a->i[i]; k<a->i[i+1]; k++) { 1561 for (l=0; l<bs; l++) { 1562 #if defined(PETSC_USE_COMPLEX) 1563 if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) { 1564 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l, 1565 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1566 } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) { 1567 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l, 1568 (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1569 } else { 1570 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr); 1571 } 1572 #else 1573 ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr); 1574 #endif 1575 } 1576 } 1577 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 1578 } 1579 } 1580 ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr); 1581 } 1582 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 1583 PetscFunctionReturn(0); 1584 } 1585 1586 #include <petscdraw.h> 1587 #undef __FUNCT__ 1588 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom" 1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa) 1590 { 1591 Mat A = (Mat) Aa; 1592 Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)A->data; 1593 PetscErrorCode ierr; 1594 PetscInt row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2; 1595 PetscReal xl,yl,xr,yr,x_l,x_r,y_l,y_r; 1596 MatScalar *aa; 1597 PetscViewer viewer; 1598 PetscViewerFormat format; 1599 1600 PetscFunctionBegin; 1601 ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr); 1602 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 1603 1604 ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr); 1605 1606 /* loop over matrix elements drawing boxes */ 1607 1608 if (format != PETSC_VIEWER_DRAW_CONTOUR) { 1609 color = PETSC_DRAW_BLUE; 1610 for (i=0,row=0; i<mbs; i++,row+=bs) { 1611 for (j=a->i[i]; j<a->i[i+1]; j++) { 1612 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1613 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1614 aa = a->a + j*bs2; 1615 for (k=0; k<bs; k++) { 1616 for (l=0; l<bs; l++) { 1617 if (PetscRealPart(*aa++) >= 0.) continue; 1618 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1619 } 1620 } 1621 } 1622 } 1623 color = PETSC_DRAW_CYAN; 1624 for (i=0,row=0; i<mbs; i++,row+=bs) { 1625 for (j=a->i[i]; j<a->i[i+1]; j++) { 1626 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1627 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1628 aa = a->a + j*bs2; 1629 for (k=0; k<bs; k++) { 1630 for (l=0; l<bs; l++) { 1631 if (PetscRealPart(*aa++) != 0.) continue; 1632 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1633 } 1634 } 1635 } 1636 } 1637 color = PETSC_DRAW_RED; 1638 for (i=0,row=0; i<mbs; i++,row+=bs) { 1639 for (j=a->i[i]; j<a->i[i+1]; j++) { 1640 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1641 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1642 aa = a->a + j*bs2; 1643 for (k=0; k<bs; k++) { 1644 for (l=0; l<bs; l++) { 1645 if (PetscRealPart(*aa++) <= 0.) continue; 1646 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1647 } 1648 } 1649 } 1650 } 1651 } else { 1652 /* use contour shading to indicate magnitude of values */ 1653 /* first determine max of all nonzero values */ 1654 PetscDraw popup; 1655 PetscReal scale,maxv = 0.0; 1656 1657 for (i=0; i<a->nz*a->bs2; i++) { 1658 if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]); 1659 } 1660 scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv; 1661 ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr); 1662 if (popup) { 1663 ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr); 1664 } 1665 for (i=0,row=0; i<mbs; i++,row+=bs) { 1666 for (j=a->i[i]; j<a->i[i+1]; j++) { 1667 y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0; 1668 x_l = a->j[j]*bs; x_r = x_l + 1.0; 1669 aa = a->a + j*bs2; 1670 for (k=0; k<bs; k++) { 1671 for (l=0; l<bs; l++) { 1672 color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++)); 1673 ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr); 1674 } 1675 } 1676 } 1677 } 1678 } 1679 PetscFunctionReturn(0); 1680 } 1681 1682 #undef __FUNCT__ 1683 #define __FUNCT__ "MatView_SeqBAIJ_Draw" 1684 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer) 1685 { 1686 PetscErrorCode ierr; 1687 PetscReal xl,yl,xr,yr,w,h; 1688 PetscDraw draw; 1689 PetscBool isnull; 1690 1691 PetscFunctionBegin; 1692 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 1693 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0); 1694 1695 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr); 1696 xr = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0; 1697 xr += w; yr += h; xl = -w; yl = -h; 1698 ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr); 1699 ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr); 1700 ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr); 1701 PetscFunctionReturn(0); 1702 } 1703 1704 #undef __FUNCT__ 1705 #define __FUNCT__ "MatView_SeqBAIJ" 1706 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer) 1707 { 1708 PetscErrorCode ierr; 1709 PetscBool iascii,isbinary,isdraw; 1710 1711 PetscFunctionBegin; 1712 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 1713 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 1714 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 1715 if (iascii) { 1716 ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr); 1717 } else if (isbinary) { 1718 ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr); 1719 } else if (isdraw) { 1720 ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr); 1721 } else { 1722 Mat B; 1723 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 1724 ierr = MatView(B,viewer);CHKERRQ(ierr); 1725 ierr = MatDestroy(&B);CHKERRQ(ierr); 1726 } 1727 PetscFunctionReturn(0); 1728 } 1729 1730 1731 #undef __FUNCT__ 1732 #define __FUNCT__ "MatGetValues_SeqBAIJ" 1733 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[]) 1734 { 1735 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1736 PetscInt *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j; 1737 PetscInt *ai = a->i,*ailen = a->ilen; 1738 PetscInt brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2; 1739 MatScalar *ap,*aa = a->a; 1740 1741 PetscFunctionBegin; 1742 for (k=0; k<m; k++) { /* loop over rows */ 1743 row = im[k]; brow = row/bs; 1744 if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 1745 if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row); 1746 rp = aj + ai[brow]; ap = aa + bs2*ai[brow]; 1747 nrow = ailen[brow]; 1748 for (l=0; l<n; l++) { /* loop over columns */ 1749 if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 1750 if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]); 1751 col = in[l]; 1752 bcol = col/bs; 1753 cidx = col%bs; 1754 ridx = row%bs; 1755 high = nrow; 1756 low = 0; /* assume unsorted */ 1757 while (high-low > 5) { 1758 t = (low+high)/2; 1759 if (rp[t] > bcol) high = t; 1760 else low = t; 1761 } 1762 for (i=low; i<high; i++) { 1763 if (rp[i] > bcol) break; 1764 if (rp[i] == bcol) { 1765 *v++ = ap[bs2*i+bs*cidx+ridx]; 1766 goto finished; 1767 } 1768 } 1769 *v++ = 0.0; 1770 finished:; 1771 } 1772 } 1773 PetscFunctionReturn(0); 1774 } 1775 1776 #undef __FUNCT__ 1777 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ" 1778 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 1779 { 1780 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1781 PetscInt *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1; 1782 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 1783 PetscErrorCode ierr; 1784 PetscInt *aj =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval; 1785 PetscBool roworiented=a->roworiented; 1786 const PetscScalar *value = v; 1787 MatScalar *ap,*aa = a->a,*bap; 1788 1789 PetscFunctionBegin; 1790 if (roworiented) { 1791 stepval = (n-1)*bs; 1792 } else { 1793 stepval = (m-1)*bs; 1794 } 1795 for (k=0; k<m; k++) { /* loop over added rows */ 1796 row = im[k]; 1797 if (row < 0) continue; 1798 #if defined(PETSC_USE_DEBUG) 1799 if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1); 1800 #endif 1801 rp = aj + ai[row]; 1802 ap = aa + bs2*ai[row]; 1803 rmax = imax[row]; 1804 nrow = ailen[row]; 1805 low = 0; 1806 high = nrow; 1807 for (l=0; l<n; l++) { /* loop over added columns */ 1808 if (in[l] < 0) continue; 1809 #if defined(PETSC_USE_DEBUG) 1810 if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1); 1811 #endif 1812 col = in[l]; 1813 if (roworiented) { 1814 value = v + (k*(stepval+bs) + l)*bs; 1815 } else { 1816 value = v + (l*(stepval+bs) + k)*bs; 1817 } 1818 if (col <= lastcol) low = 0; 1819 else high = nrow; 1820 lastcol = col; 1821 while (high-low > 7) { 1822 t = (low+high)/2; 1823 if (rp[t] > col) high = t; 1824 else low = t; 1825 } 1826 for (i=low; i<high; i++) { 1827 if (rp[i] > col) break; 1828 if (rp[i] == col) { 1829 bap = ap + bs2*i; 1830 if (roworiented) { 1831 if (is == ADD_VALUES) { 1832 for (ii=0; ii<bs; ii++,value+=stepval) { 1833 for (jj=ii; jj<bs2; jj+=bs) { 1834 bap[jj] += *value++; 1835 } 1836 } 1837 } else { 1838 for (ii=0; ii<bs; ii++,value+=stepval) { 1839 for (jj=ii; jj<bs2; jj+=bs) { 1840 bap[jj] = *value++; 1841 } 1842 } 1843 } 1844 } else { 1845 if (is == ADD_VALUES) { 1846 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1847 for (jj=0; jj<bs; jj++) { 1848 bap[jj] += value[jj]; 1849 } 1850 bap += bs; 1851 } 1852 } else { 1853 for (ii=0; ii<bs; ii++,value+=bs+stepval) { 1854 for (jj=0; jj<bs; jj++) { 1855 bap[jj] = value[jj]; 1856 } 1857 bap += bs; 1858 } 1859 } 1860 } 1861 goto noinsert2; 1862 } 1863 } 1864 if (nonew == 1) goto noinsert2; 1865 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col); 1866 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 1867 N = nrow++ - 1; high++; 1868 /* shift up all the later entries in this row */ 1869 for (ii=N; ii>=i; ii--) { 1870 rp[ii+1] = rp[ii]; 1871 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 1872 } 1873 if (N >= i) { 1874 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1875 } 1876 rp[i] = col; 1877 bap = ap + bs2*i; 1878 if (roworiented) { 1879 for (ii=0; ii<bs; ii++,value+=stepval) { 1880 for (jj=ii; jj<bs2; jj+=bs) { 1881 bap[jj] = *value++; 1882 } 1883 } 1884 } else { 1885 for (ii=0; ii<bs; ii++,value+=stepval) { 1886 for (jj=0; jj<bs; jj++) { 1887 *bap++ = *value++; 1888 } 1889 } 1890 } 1891 noinsert2:; 1892 low = i; 1893 } 1894 ailen[row] = nrow; 1895 } 1896 PetscFunctionReturn(0); 1897 } 1898 1899 #undef __FUNCT__ 1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ" 1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode) 1902 { 1903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 1904 PetscInt fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax; 1905 PetscInt m = A->rmap->N,*ip,N,*ailen = a->ilen; 1906 PetscErrorCode ierr; 1907 PetscInt mbs = a->mbs,bs2 = a->bs2,rmax = 0; 1908 MatScalar *aa = a->a,*ap; 1909 PetscReal ratio=0.6; 1910 1911 PetscFunctionBegin; 1912 if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0); 1913 1914 if (m) rmax = ailen[0]; 1915 for (i=1; i<mbs; i++) { 1916 /* move each row back by the amount of empty slots (fshift) before it*/ 1917 fshift += imax[i-1] - ailen[i-1]; 1918 rmax = PetscMax(rmax,ailen[i]); 1919 if (fshift) { 1920 ip = aj + ai[i]; ap = aa + bs2*ai[i]; 1921 N = ailen[i]; 1922 for (j=0; j<N; j++) { 1923 ip[j-fshift] = ip[j]; 1924 1925 ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr); 1926 } 1927 } 1928 ai[i] = ai[i-1] + ailen[i-1]; 1929 } 1930 if (mbs) { 1931 fshift += imax[mbs-1] - ailen[mbs-1]; 1932 ai[mbs] = ai[mbs-1] + ailen[mbs-1]; 1933 } 1934 1935 /* reset ilen and imax for each row */ 1936 a->nonzerorowcnt = 0; 1937 for (i=0; i<mbs; i++) { 1938 ailen[i] = imax[i] = ai[i+1] - ai[i]; 1939 a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0); 1940 } 1941 a->nz = ai[mbs]; 1942 1943 /* diagonals may have moved, so kill the diagonal pointers */ 1944 a->idiagvalid = PETSC_FALSE; 1945 if (fshift && a->diag) { 1946 ierr = PetscFree(a->diag);CHKERRQ(ierr); 1947 ierr = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 1948 a->diag = 0; 1949 } 1950 if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2); 1951 ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr); 1952 ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr); 1953 ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr); 1954 1955 A->info.mallocs += a->reallocs; 1956 a->reallocs = 0; 1957 A->info.nz_unneeded = (PetscReal)fshift*bs2; 1958 a->rmax = rmax; 1959 1960 ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 /* 1965 This function returns an array of flags which indicate the locations of contiguous 1966 blocks that should be zeroed. for eg: if bs = 3 and is = [0,1,2,3,5,6,7,8,9] 1967 then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)] 1968 Assume: sizes should be long enough to hold all the values. 1969 */ 1970 #undef __FUNCT__ 1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks" 1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max) 1973 { 1974 PetscInt i,j,k,row; 1975 PetscBool flg; 1976 1977 PetscFunctionBegin; 1978 for (i=0,j=0; i<n; j++) { 1979 row = idx[i]; 1980 if (row%bs!=0) { /* Not the begining of a block */ 1981 sizes[j] = 1; 1982 i++; 1983 } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */ 1984 sizes[j] = 1; /* Also makes sure atleast 'bs' values exist for next else */ 1985 i++; 1986 } else { /* Begining of the block, so check if the complete block exists */ 1987 flg = PETSC_TRUE; 1988 for (k=1; k<bs; k++) { 1989 if (row+k != idx[i+k]) { /* break in the block */ 1990 flg = PETSC_FALSE; 1991 break; 1992 } 1993 } 1994 if (flg) { /* No break in the bs */ 1995 sizes[j] = bs; 1996 i += bs; 1997 } else { 1998 sizes[j] = 1; 1999 i++; 2000 } 2001 } 2002 } 2003 *bs_max = j; 2004 PetscFunctionReturn(0); 2005 } 2006 2007 #undef __FUNCT__ 2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ" 2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2010 { 2011 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2012 PetscErrorCode ierr; 2013 PetscInt i,j,k,count,*rows; 2014 PetscInt bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max; 2015 PetscScalar zero = 0.0; 2016 MatScalar *aa; 2017 const PetscScalar *xx; 2018 PetscScalar *bb; 2019 2020 PetscFunctionBegin; 2021 /* fix right hand side if needed */ 2022 if (x && b) { 2023 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2024 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2025 for (i=0; i<is_n; i++) { 2026 bb[is_idx[i]] = diag*xx[is_idx[i]]; 2027 } 2028 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2029 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2030 } 2031 2032 /* Make a copy of the IS and sort it */ 2033 /* allocate memory for rows,sizes */ 2034 ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr); 2035 2036 /* copy IS values to rows, and sort them */ 2037 for (i=0; i<is_n; i++) rows[i] = is_idx[i]; 2038 ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr); 2039 2040 if (baij->keepnonzeropattern) { 2041 for (i=0; i<is_n; i++) sizes[i] = 1; 2042 bs_max = is_n; 2043 } else { 2044 ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr); 2045 A->nonzerostate++; 2046 } 2047 2048 for (i=0,j=0; i<bs_max; j+=sizes[i],i++) { 2049 row = rows[j]; 2050 if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row); 2051 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2052 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2053 if (sizes[i] == bs && !baij->keepnonzeropattern) { 2054 if (diag != (PetscScalar)0.0) { 2055 if (baij->ilen[row/bs] > 0) { 2056 baij->ilen[row/bs] = 1; 2057 baij->j[baij->i[row/bs]] = row/bs; 2058 2059 ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr); 2060 } 2061 /* Now insert all the diagonal values for this bs */ 2062 for (k=0; k<bs; k++) { 2063 ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr); 2064 } 2065 } else { /* (diag == 0.0) */ 2066 baij->ilen[row/bs] = 0; 2067 } /* end (diag == 0.0) */ 2068 } else { /* (sizes[i] != bs) */ 2069 #if defined(PETSC_USE_DEBUG) 2070 if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1"); 2071 #endif 2072 for (k=0; k<count; k++) { 2073 aa[0] = zero; 2074 aa += bs; 2075 } 2076 if (diag != (PetscScalar)0.0) { 2077 ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr); 2078 } 2079 } 2080 } 2081 2082 ierr = PetscFree2(rows,sizes);CHKERRQ(ierr); 2083 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2084 PetscFunctionReturn(0); 2085 } 2086 2087 #undef __FUNCT__ 2088 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ" 2089 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b) 2090 { 2091 Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data; 2092 PetscErrorCode ierr; 2093 PetscInt i,j,k,count; 2094 PetscInt bs =A->rmap->bs,bs2=baij->bs2,row,col; 2095 PetscScalar zero = 0.0; 2096 MatScalar *aa; 2097 const PetscScalar *xx; 2098 PetscScalar *bb; 2099 PetscBool *zeroed,vecs = PETSC_FALSE; 2100 2101 PetscFunctionBegin; 2102 /* fix right hand side if needed */ 2103 if (x && b) { 2104 ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr); 2105 ierr = VecGetArray(b,&bb);CHKERRQ(ierr); 2106 vecs = PETSC_TRUE; 2107 } 2108 2109 /* zero the columns */ 2110 ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr); 2111 for (i=0; i<is_n; i++) { 2112 if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]); 2113 zeroed[is_idx[i]] = PETSC_TRUE; 2114 } 2115 for (i=0; i<A->rmap->N; i++) { 2116 if (!zeroed[i]) { 2117 row = i/bs; 2118 for (j=baij->i[row]; j<baij->i[row+1]; j++) { 2119 for (k=0; k<bs; k++) { 2120 col = bs*baij->j[j] + k; 2121 if (zeroed[col]) { 2122 aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k; 2123 if (vecs) bb[i] -= aa[0]*xx[col]; 2124 aa[0] = 0.0; 2125 } 2126 } 2127 } 2128 } else if (vecs) bb[i] = diag*xx[i]; 2129 } 2130 ierr = PetscFree(zeroed);CHKERRQ(ierr); 2131 if (vecs) { 2132 ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr); 2133 ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr); 2134 } 2135 2136 /* zero the rows */ 2137 for (i=0; i<is_n; i++) { 2138 row = is_idx[i]; 2139 count = (baij->i[row/bs +1] - baij->i[row/bs])*bs; 2140 aa = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs); 2141 for (k=0; k<count; k++) { 2142 aa[0] = zero; 2143 aa += bs; 2144 } 2145 if (diag != (PetscScalar)0.0) { 2146 ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr); 2147 } 2148 } 2149 ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 #undef __FUNCT__ 2154 #define __FUNCT__ "MatSetValues_SeqBAIJ" 2155 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is) 2156 { 2157 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2158 PetscInt *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1; 2159 PetscInt *imax=a->imax,*ai=a->i,*ailen=a->ilen; 2160 PetscInt *aj =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol; 2161 PetscErrorCode ierr; 2162 PetscInt ridx,cidx,bs2=a->bs2; 2163 PetscBool roworiented=a->roworiented; 2164 MatScalar *ap,value,*aa=a->a,*bap; 2165 2166 PetscFunctionBegin; 2167 for (k=0; k<m; k++) { /* loop over added rows */ 2168 row = im[k]; 2169 brow = row/bs; 2170 if (row < 0) continue; 2171 #if defined(PETSC_USE_DEBUG) 2172 if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1); 2173 #endif 2174 rp = aj + ai[brow]; 2175 ap = aa + bs2*ai[brow]; 2176 rmax = imax[brow]; 2177 nrow = ailen[brow]; 2178 low = 0; 2179 high = nrow; 2180 for (l=0; l<n; l++) { /* loop over added columns */ 2181 if (in[l] < 0) continue; 2182 #if defined(PETSC_USE_DEBUG) 2183 if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1); 2184 #endif 2185 col = in[l]; bcol = col/bs; 2186 ridx = row % bs; cidx = col % bs; 2187 if (roworiented) { 2188 value = v[l + k*n]; 2189 } else { 2190 value = v[k + l*m]; 2191 } 2192 if (col <= lastcol) low = 0; else high = nrow; 2193 lastcol = col; 2194 while (high-low > 7) { 2195 t = (low+high)/2; 2196 if (rp[t] > bcol) high = t; 2197 else low = t; 2198 } 2199 for (i=low; i<high; i++) { 2200 if (rp[i] > bcol) break; 2201 if (rp[i] == bcol) { 2202 bap = ap + bs2*i + bs*cidx + ridx; 2203 if (is == ADD_VALUES) *bap += value; 2204 else *bap = value; 2205 goto noinsert1; 2206 } 2207 } 2208 if (nonew == 1) goto noinsert1; 2209 if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); 2210 MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar); 2211 N = nrow++ - 1; high++; 2212 /* shift up all the later entries in this row */ 2213 for (ii=N; ii>=i; ii--) { 2214 rp[ii+1] = rp[ii]; 2215 ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); 2216 } 2217 if (N>=i) { 2218 ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr); 2219 } 2220 rp[i] = bcol; 2221 ap[bs2*i + bs*cidx + ridx] = value; 2222 a->nz++; 2223 A->nonzerostate++; 2224 noinsert1:; 2225 low = i; 2226 } 2227 ailen[brow] = nrow; 2228 } 2229 PetscFunctionReturn(0); 2230 } 2231 2232 #undef __FUNCT__ 2233 #define __FUNCT__ "MatILUFactor_SeqBAIJ" 2234 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info) 2235 { 2236 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data; 2237 Mat outA; 2238 PetscErrorCode ierr; 2239 PetscBool row_identity,col_identity; 2240 2241 PetscFunctionBegin; 2242 if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU"); 2243 ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr); 2244 ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr); 2245 if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU"); 2246 2247 outA = inA; 2248 inA->factortype = MAT_FACTOR_LU; 2249 2250 ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr); 2251 2252 ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr); 2253 ierr = ISDestroy(&a->row);CHKERRQ(ierr); 2254 a->row = row; 2255 ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr); 2256 ierr = ISDestroy(&a->col);CHKERRQ(ierr); 2257 a->col = col; 2258 2259 /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */ 2260 ierr = ISDestroy(&a->icol);CHKERRQ(ierr); 2261 ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr); 2262 ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr); 2263 2264 ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr); 2265 if (!a->solve_work) { 2266 ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr); 2267 ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr); 2268 } 2269 ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr); 2270 PetscFunctionReturn(0); 2271 } 2272 2273 #undef __FUNCT__ 2274 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ" 2275 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices) 2276 { 2277 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data; 2278 PetscInt i,nz,mbs; 2279 2280 PetscFunctionBegin; 2281 nz = baij->maxnz; 2282 mbs = baij->mbs; 2283 for (i=0; i<nz; i++) { 2284 baij->j[i] = indices[i]; 2285 } 2286 baij->nz = nz; 2287 for (i=0; i<mbs; i++) { 2288 baij->ilen[i] = baij->imax[i]; 2289 } 2290 PetscFunctionReturn(0); 2291 } 2292 2293 #undef __FUNCT__ 2294 #define __FUNCT__ "MatSeqBAIJSetColumnIndices" 2295 /*@ 2296 MatSeqBAIJSetColumnIndices - Set the column indices for all the rows 2297 in the matrix. 2298 2299 Input Parameters: 2300 + mat - the SeqBAIJ matrix 2301 - indices - the column indices 2302 2303 Level: advanced 2304 2305 Notes: 2306 This can be called if you have precomputed the nonzero structure of the 2307 matrix and want to provide it to the matrix object to improve the performance 2308 of the MatSetValues() operation. 2309 2310 You MUST have set the correct numbers of nonzeros per row in the call to 2311 MatCreateSeqBAIJ(), and the columns indices MUST be sorted. 2312 2313 MUST be called before any calls to MatSetValues(); 2314 2315 @*/ 2316 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices) 2317 { 2318 PetscErrorCode ierr; 2319 2320 PetscFunctionBegin; 2321 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2322 PetscValidPointer(indices,2); 2323 ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr); 2324 PetscFunctionReturn(0); 2325 } 2326 2327 #undef __FUNCT__ 2328 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ" 2329 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[]) 2330 { 2331 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2332 PetscErrorCode ierr; 2333 PetscInt i,j,n,row,bs,*ai,*aj,mbs; 2334 PetscReal atmp; 2335 PetscScalar *x,zero = 0.0; 2336 MatScalar *aa; 2337 PetscInt ncols,brow,krow,kcol; 2338 2339 PetscFunctionBegin; 2340 if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 2341 bs = A->rmap->bs; 2342 aa = a->a; 2343 ai = a->i; 2344 aj = a->j; 2345 mbs = a->mbs; 2346 2347 ierr = VecSet(v,zero);CHKERRQ(ierr); 2348 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 2349 ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr); 2350 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector"); 2351 for (i=0; i<mbs; i++) { 2352 ncols = ai[1] - ai[0]; ai++; 2353 brow = bs*i; 2354 for (j=0; j<ncols; j++) { 2355 for (kcol=0; kcol<bs; kcol++) { 2356 for (krow=0; krow<bs; krow++) { 2357 atmp = PetscAbsScalar(*aa);aa++; 2358 row = brow + krow; /* row index */ 2359 if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;} 2360 } 2361 } 2362 aj++; 2363 } 2364 } 2365 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #undef __FUNCT__ 2370 #define __FUNCT__ "MatCopy_SeqBAIJ" 2371 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str) 2372 { 2373 PetscErrorCode ierr; 2374 2375 PetscFunctionBegin; 2376 /* If the two matrices have the same copy implementation, use fast copy. */ 2377 if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) { 2378 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2379 Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data; 2380 PetscInt ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs; 2381 2382 if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]); 2383 if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs); 2384 ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr); 2385 } else { 2386 ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr); 2387 } 2388 PetscFunctionReturn(0); 2389 } 2390 2391 #undef __FUNCT__ 2392 #define __FUNCT__ "MatSetUp_SeqBAIJ" 2393 PetscErrorCode MatSetUp_SeqBAIJ(Mat A) 2394 { 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr); 2399 PetscFunctionReturn(0); 2400 } 2401 2402 #undef __FUNCT__ 2403 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ" 2404 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2405 { 2406 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2407 2408 PetscFunctionBegin; 2409 *array = a->a; 2410 PetscFunctionReturn(0); 2411 } 2412 2413 #undef __FUNCT__ 2414 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ" 2415 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[]) 2416 { 2417 PetscFunctionBegin; 2418 PetscFunctionReturn(0); 2419 } 2420 2421 #undef __FUNCT__ 2422 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ" 2423 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz) 2424 { 2425 PetscInt bs = Y->rmap->bs,mbs = Y->rmap->N/bs; 2426 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data; 2427 Mat_SeqBAIJ *y = (Mat_SeqBAIJ*)Y->data; 2428 PetscErrorCode ierr; 2429 2430 PetscFunctionBegin; 2431 /* Set the number of nonzeros in the new matrix */ 2432 ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr); 2433 PetscFunctionReturn(0); 2434 } 2435 2436 #undef __FUNCT__ 2437 #define __FUNCT__ "MatAXPY_SeqBAIJ" 2438 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str) 2439 { 2440 Mat_SeqBAIJ *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data; 2441 PetscErrorCode ierr; 2442 PetscInt bs=Y->rmap->bs,bs2=bs*bs; 2443 PetscBLASInt one=1; 2444 2445 PetscFunctionBegin; 2446 if (str == SAME_NONZERO_PATTERN) { 2447 PetscScalar alpha = a; 2448 PetscBLASInt bnz; 2449 ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr); 2450 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one)); 2451 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 2452 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2453 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 2454 } else { 2455 Mat B; 2456 PetscInt *nnz; 2457 if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size"); 2458 ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr); 2459 ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr); 2460 ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr); 2461 ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr); 2462 ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr); 2463 ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr); 2464 ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr); 2465 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2466 ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr); 2467 ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr); 2468 ierr = PetscFree(nnz);CHKERRQ(ierr); 2469 } 2470 PetscFunctionReturn(0); 2471 } 2472 2473 #undef __FUNCT__ 2474 #define __FUNCT__ "MatRealPart_SeqBAIJ" 2475 PetscErrorCode MatRealPart_SeqBAIJ(Mat A) 2476 { 2477 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2478 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2479 MatScalar *aa = a->a; 2480 2481 PetscFunctionBegin; 2482 for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 #undef __FUNCT__ 2487 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ" 2488 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A) 2489 { 2490 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2491 PetscInt i,nz = a->bs2*a->i[a->mbs]; 2492 MatScalar *aa = a->a; 2493 2494 PetscFunctionBegin; 2495 for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 #undef __FUNCT__ 2500 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ" 2501 /* 2502 Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code 2503 */ 2504 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2505 { 2506 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2507 PetscErrorCode ierr; 2508 PetscInt bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs; 2509 PetscInt nz = a->i[m],row,*jj,mr,col; 2510 2511 PetscFunctionBegin; 2512 *nn = n; 2513 if (!ia) PetscFunctionReturn(0); 2514 if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices"); 2515 else { 2516 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2517 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2518 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2519 jj = a->j; 2520 for (i=0; i<nz; i++) { 2521 collengths[jj[i]]++; 2522 } 2523 cia[0] = oshift; 2524 for (i=0; i<n; i++) { 2525 cia[i+1] = cia[i] + collengths[i]; 2526 } 2527 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2528 jj = a->j; 2529 for (row=0; row<m; row++) { 2530 mr = a->i[row+1] - a->i[row]; 2531 for (i=0; i<mr; i++) { 2532 col = *jj++; 2533 2534 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2535 } 2536 } 2537 ierr = PetscFree(collengths);CHKERRQ(ierr); 2538 *ia = cia; *ja = cja; 2539 } 2540 PetscFunctionReturn(0); 2541 } 2542 2543 #undef __FUNCT__ 2544 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ" 2545 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool *done) 2546 { 2547 PetscErrorCode ierr; 2548 2549 PetscFunctionBegin; 2550 if (!ia) PetscFunctionReturn(0); 2551 ierr = PetscFree(*ia);CHKERRQ(ierr); 2552 ierr = PetscFree(*ja);CHKERRQ(ierr); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 /* 2557 MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from 2558 MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output 2559 spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate() 2560 */ 2561 #undef __FUNCT__ 2562 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color" 2563 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2564 { 2565 Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data; 2566 PetscErrorCode ierr; 2567 PetscInt i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs; 2568 PetscInt nz = a->i[m],row,*jj,mr,col; 2569 PetscInt *cspidx; 2570 2571 PetscFunctionBegin; 2572 *nn = n; 2573 if (!ia) PetscFunctionReturn(0); 2574 2575 ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr); 2576 ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr); 2577 ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr); 2578 ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr); 2579 jj = a->j; 2580 for (i=0; i<nz; i++) { 2581 collengths[jj[i]]++; 2582 } 2583 cia[0] = oshift; 2584 for (i=0; i<n; i++) { 2585 cia[i+1] = cia[i] + collengths[i]; 2586 } 2587 ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr); 2588 jj = a->j; 2589 for (row=0; row<m; row++) { 2590 mr = a->i[row+1] - a->i[row]; 2591 for (i=0; i<mr; i++) { 2592 col = *jj++; 2593 cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */ 2594 cja[cia[col] + collengths[col]++ - oshift] = row + oshift; 2595 } 2596 } 2597 ierr = PetscFree(collengths);CHKERRQ(ierr); 2598 *ia = cia; *ja = cja; 2599 *spidx = cspidx; 2600 PetscFunctionReturn(0); 2601 } 2602 2603 #undef __FUNCT__ 2604 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color" 2605 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool *done) 2606 { 2607 PetscErrorCode ierr; 2608 2609 PetscFunctionBegin; 2610 ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr); 2611 ierr = PetscFree(*spidx);CHKERRQ(ierr); 2612 PetscFunctionReturn(0); 2613 } 2614 2615 #undef __FUNCT__ 2616 #define __FUNCT__ "MatShift_SeqBAIJ" 2617 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a) 2618 { 2619 PetscErrorCode ierr; 2620 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)Y->data; 2621 2622 PetscFunctionBegin; 2623 if (!Y->preallocated || !aij->nz) { 2624 ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr); 2625 } 2626 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 2627 PetscFunctionReturn(0); 2628 } 2629 2630 /* -------------------------------------------------------------------*/ 2631 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ, 2632 MatGetRow_SeqBAIJ, 2633 MatRestoreRow_SeqBAIJ, 2634 MatMult_SeqBAIJ_N, 2635 /* 4*/ MatMultAdd_SeqBAIJ_N, 2636 MatMultTranspose_SeqBAIJ, 2637 MatMultTransposeAdd_SeqBAIJ, 2638 0, 2639 0, 2640 0, 2641 /* 10*/ 0, 2642 MatLUFactor_SeqBAIJ, 2643 0, 2644 0, 2645 MatTranspose_SeqBAIJ, 2646 /* 15*/ MatGetInfo_SeqBAIJ, 2647 MatEqual_SeqBAIJ, 2648 MatGetDiagonal_SeqBAIJ, 2649 MatDiagonalScale_SeqBAIJ, 2650 MatNorm_SeqBAIJ, 2651 /* 20*/ 0, 2652 MatAssemblyEnd_SeqBAIJ, 2653 MatSetOption_SeqBAIJ, 2654 MatZeroEntries_SeqBAIJ, 2655 /* 24*/ MatZeroRows_SeqBAIJ, 2656 0, 2657 0, 2658 0, 2659 0, 2660 /* 29*/ MatSetUp_SeqBAIJ, 2661 0, 2662 0, 2663 0, 2664 0, 2665 /* 34*/ MatDuplicate_SeqBAIJ, 2666 0, 2667 0, 2668 MatILUFactor_SeqBAIJ, 2669 0, 2670 /* 39*/ MatAXPY_SeqBAIJ, 2671 MatGetSubMatrices_SeqBAIJ, 2672 MatIncreaseOverlap_SeqBAIJ, 2673 MatGetValues_SeqBAIJ, 2674 MatCopy_SeqBAIJ, 2675 /* 44*/ 0, 2676 MatScale_SeqBAIJ, 2677 MatShift_SeqBAIJ, 2678 0, 2679 MatZeroRowsColumns_SeqBAIJ, 2680 /* 49*/ 0, 2681 MatGetRowIJ_SeqBAIJ, 2682 MatRestoreRowIJ_SeqBAIJ, 2683 MatGetColumnIJ_SeqBAIJ, 2684 MatRestoreColumnIJ_SeqBAIJ, 2685 /* 54*/ MatFDColoringCreate_SeqXAIJ, 2686 0, 2687 0, 2688 0, 2689 MatSetValuesBlocked_SeqBAIJ, 2690 /* 59*/ MatGetSubMatrix_SeqBAIJ, 2691 MatDestroy_SeqBAIJ, 2692 MatView_SeqBAIJ, 2693 0, 2694 0, 2695 /* 64*/ 0, 2696 0, 2697 0, 2698 0, 2699 0, 2700 /* 69*/ MatGetRowMaxAbs_SeqBAIJ, 2701 0, 2702 MatConvert_Basic, 2703 0, 2704 0, 2705 /* 74*/ 0, 2706 MatFDColoringApply_BAIJ, 2707 0, 2708 0, 2709 0, 2710 /* 79*/ 0, 2711 0, 2712 0, 2713 0, 2714 MatLoad_SeqBAIJ, 2715 /* 84*/ 0, 2716 0, 2717 0, 2718 0, 2719 0, 2720 /* 89*/ 0, 2721 0, 2722 0, 2723 0, 2724 0, 2725 /* 94*/ 0, 2726 0, 2727 0, 2728 0, 2729 0, 2730 /* 99*/ 0, 2731 0, 2732 0, 2733 0, 2734 0, 2735 /*104*/ 0, 2736 MatRealPart_SeqBAIJ, 2737 MatImaginaryPart_SeqBAIJ, 2738 0, 2739 0, 2740 /*109*/ 0, 2741 0, 2742 0, 2743 0, 2744 MatMissingDiagonal_SeqBAIJ, 2745 /*114*/ 0, 2746 0, 2747 0, 2748 0, 2749 0, 2750 /*119*/ 0, 2751 0, 2752 MatMultHermitianTranspose_SeqBAIJ, 2753 MatMultHermitianTransposeAdd_SeqBAIJ, 2754 0, 2755 /*124*/ 0, 2756 0, 2757 MatInvertBlockDiagonal_SeqBAIJ, 2758 0, 2759 0, 2760 /*129*/ 0, 2761 0, 2762 0, 2763 0, 2764 0, 2765 /*134*/ 0, 2766 0, 2767 0, 2768 0, 2769 0, 2770 /*139*/ 0, 2771 0, 2772 0, 2773 MatFDColoringSetUp_SeqXAIJ, 2774 0, 2775 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ 2776 }; 2777 2778 #undef __FUNCT__ 2779 #define __FUNCT__ "MatStoreValues_SeqBAIJ" 2780 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat) 2781 { 2782 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2783 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2784 PetscErrorCode ierr; 2785 2786 PetscFunctionBegin; 2787 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2788 2789 /* allocate space for values if not already there */ 2790 if (!aij->saved_values) { 2791 ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr); 2792 ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr); 2793 } 2794 2795 /* copy values over */ 2796 ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2797 PetscFunctionReturn(0); 2798 } 2799 2800 #undef __FUNCT__ 2801 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ" 2802 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat) 2803 { 2804 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ*)mat->data; 2805 PetscErrorCode ierr; 2806 PetscInt nz = aij->i[aij->mbs]*aij->bs2; 2807 2808 PetscFunctionBegin; 2809 if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first"); 2810 if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first"); 2811 2812 /* copy values over */ 2813 ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*); 2818 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*); 2819 2820 #undef __FUNCT__ 2821 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ" 2822 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz) 2823 { 2824 Mat_SeqBAIJ *b; 2825 PetscErrorCode ierr; 2826 PetscInt i,mbs,nbs,bs2; 2827 PetscBool flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE; 2828 2829 PetscFunctionBegin; 2830 if (nz >= 0 || nnz) realalloc = PETSC_TRUE; 2831 if (nz == MAT_SKIP_ALLOCATION) { 2832 skipallocation = PETSC_TRUE; 2833 nz = 0; 2834 } 2835 2836 ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr); 2837 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2838 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2839 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2840 2841 B->preallocated = PETSC_TRUE; 2842 2843 mbs = B->rmap->n/bs; 2844 nbs = B->cmap->n/bs; 2845 bs2 = bs*bs; 2846 2847 if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs); 2848 2849 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2850 if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz); 2851 if (nnz) { 2852 for (i=0; i<mbs; i++) { 2853 if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]); 2854 if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs); 2855 } 2856 } 2857 2858 b = (Mat_SeqBAIJ*)B->data; 2859 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr); 2860 ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr); 2861 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2862 2863 if (!flg) { 2864 switch (bs) { 2865 case 1: 2866 B->ops->mult = MatMult_SeqBAIJ_1; 2867 B->ops->multadd = MatMultAdd_SeqBAIJ_1; 2868 break; 2869 case 2: 2870 B->ops->mult = MatMult_SeqBAIJ_2; 2871 B->ops->multadd = MatMultAdd_SeqBAIJ_2; 2872 break; 2873 case 3: 2874 B->ops->mult = MatMult_SeqBAIJ_3; 2875 B->ops->multadd = MatMultAdd_SeqBAIJ_3; 2876 break; 2877 case 4: 2878 B->ops->mult = MatMult_SeqBAIJ_4; 2879 B->ops->multadd = MatMultAdd_SeqBAIJ_4; 2880 break; 2881 case 5: 2882 B->ops->mult = MatMult_SeqBAIJ_5; 2883 B->ops->multadd = MatMultAdd_SeqBAIJ_5; 2884 break; 2885 case 6: 2886 B->ops->mult = MatMult_SeqBAIJ_6; 2887 B->ops->multadd = MatMultAdd_SeqBAIJ_6; 2888 break; 2889 case 7: 2890 B->ops->mult = MatMult_SeqBAIJ_7; 2891 B->ops->multadd = MatMultAdd_SeqBAIJ_7; 2892 break; 2893 case 15: 2894 B->ops->mult = MatMult_SeqBAIJ_15_ver1; 2895 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2896 break; 2897 default: 2898 B->ops->mult = MatMult_SeqBAIJ_N; 2899 B->ops->multadd = MatMultAdd_SeqBAIJ_N; 2900 break; 2901 } 2902 } 2903 B->ops->sor = MatSOR_SeqBAIJ; 2904 b->mbs = mbs; 2905 b->nbs = nbs; 2906 if (!skipallocation) { 2907 if (!b->imax) { 2908 ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr); 2909 ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 2910 2911 b->free_imax_ilen = PETSC_TRUE; 2912 } 2913 /* b->ilen will count nonzeros in each block row so far. */ 2914 for (i=0; i<mbs; i++) b->ilen[i] = 0; 2915 if (!nnz) { 2916 if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5; 2917 else if (nz < 0) nz = 1; 2918 for (i=0; i<mbs; i++) b->imax[i] = nz; 2919 nz = nz*mbs; 2920 } else { 2921 nz = 0; 2922 for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];} 2923 } 2924 2925 /* allocate the matrix space */ 2926 ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr); 2927 ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr); 2928 ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr); 2929 ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr); 2930 ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 2931 2932 b->singlemalloc = PETSC_TRUE; 2933 b->i[0] = 0; 2934 for (i=1; i<mbs+1; i++) { 2935 b->i[i] = b->i[i-1] + b->imax[i-1]; 2936 } 2937 b->free_a = PETSC_TRUE; 2938 b->free_ij = PETSC_TRUE; 2939 } else { 2940 b->free_a = PETSC_FALSE; 2941 b->free_ij = PETSC_FALSE; 2942 } 2943 2944 b->bs2 = bs2; 2945 b->mbs = mbs; 2946 b->nz = 0; 2947 b->maxnz = nz; 2948 B->info.nz_unneeded = (PetscReal)b->maxnz*bs2; 2949 if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);} 2950 PetscFunctionReturn(0); 2951 } 2952 2953 #undef __FUNCT__ 2954 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ" 2955 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[]) 2956 { 2957 PetscInt i,m,nz,nz_max=0,*nnz; 2958 PetscScalar *values=0; 2959 PetscBool roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs); 2964 ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr); 2965 ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr); 2966 ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr); 2967 ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr); 2968 ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr); 2969 m = B->rmap->n/bs; 2970 2971 if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); 2972 ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr); 2973 for (i=0; i<m; i++) { 2974 nz = ii[i+1]- ii[i]; 2975 if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); 2976 nz_max = PetscMax(nz_max, nz); 2977 nnz[i] = nz; 2978 } 2979 ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr); 2980 ierr = PetscFree(nnz);CHKERRQ(ierr); 2981 2982 values = (PetscScalar*)V; 2983 if (!values) { 2984 ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr); 2985 } 2986 for (i=0; i<m; i++) { 2987 PetscInt ncols = ii[i+1] - ii[i]; 2988 const PetscInt *icols = jj + ii[i]; 2989 const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0); 2990 if (!roworiented) { 2991 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr); 2992 } else { 2993 PetscInt j; 2994 for (j=0; j<ncols; j++) { 2995 const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0); 2996 ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr); 2997 } 2998 } 2999 } 3000 if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); } 3001 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3003 ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3004 PetscFunctionReturn(0); 3005 } 3006 3007 /*MC 3008 MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on 3009 block sparse compressed row format. 3010 3011 Options Database Keys: 3012 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions() 3013 3014 Level: beginner 3015 3016 .seealso: MatCreateSeqBAIJ() 3017 M*/ 3018 3019 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*); 3020 3021 #undef __FUNCT__ 3022 #define __FUNCT__ "MatCreate_SeqBAIJ" 3023 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B) 3024 { 3025 PetscErrorCode ierr; 3026 PetscMPIInt size; 3027 Mat_SeqBAIJ *b; 3028 3029 PetscFunctionBegin; 3030 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr); 3031 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1"); 3032 3033 ierr = PetscNewLog(B,&b);CHKERRQ(ierr); 3034 B->data = (void*)b; 3035 ierr = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 3036 3037 b->row = 0; 3038 b->col = 0; 3039 b->icol = 0; 3040 b->reallocs = 0; 3041 b->saved_values = 0; 3042 3043 b->roworiented = PETSC_TRUE; 3044 b->nonew = 0; 3045 b->diag = 0; 3046 B->spptr = 0; 3047 B->info.nz_unneeded = (PetscReal)b->maxnz*b->bs2; 3048 b->keepnonzeropattern = PETSC_FALSE; 3049 3050 ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr); 3051 ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr); 3052 ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr); 3053 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr); 3054 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr); 3055 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr); 3056 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr); 3057 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr); 3058 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr); 3059 ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr); 3060 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr); 3061 PetscFunctionReturn(0); 3062 } 3063 3064 #undef __FUNCT__ 3065 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ" 3066 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace) 3067 { 3068 Mat_SeqBAIJ *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data; 3069 PetscErrorCode ierr; 3070 PetscInt i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2; 3071 3072 PetscFunctionBegin; 3073 if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix"); 3074 3075 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3076 c->imax = a->imax; 3077 c->ilen = a->ilen; 3078 c->free_imax_ilen = PETSC_FALSE; 3079 } else { 3080 ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr); 3081 ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr); 3082 for (i=0; i<mbs; i++) { 3083 c->imax[i] = a->imax[i]; 3084 c->ilen[i] = a->ilen[i]; 3085 } 3086 c->free_imax_ilen = PETSC_TRUE; 3087 } 3088 3089 /* allocate the matrix space */ 3090 if (mallocmatspace) { 3091 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3092 ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr); 3093 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr); 3094 3095 c->i = a->i; 3096 c->j = a->j; 3097 c->singlemalloc = PETSC_FALSE; 3098 c->free_a = PETSC_TRUE; 3099 c->free_ij = PETSC_FALSE; 3100 c->parent = A; 3101 C->preallocated = PETSC_TRUE; 3102 C->assembled = PETSC_TRUE; 3103 3104 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 3105 ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3106 ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3107 } else { 3108 ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr); 3109 ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3110 3111 c->singlemalloc = PETSC_TRUE; 3112 c->free_a = PETSC_TRUE; 3113 c->free_ij = PETSC_TRUE; 3114 3115 ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3116 if (mbs > 0) { 3117 ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr); 3118 if (cpvalues == MAT_COPY_VALUES) { 3119 ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3120 } else { 3121 ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr); 3122 } 3123 } 3124 C->preallocated = PETSC_TRUE; 3125 C->assembled = PETSC_TRUE; 3126 } 3127 } 3128 3129 c->roworiented = a->roworiented; 3130 c->nonew = a->nonew; 3131 3132 ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr); 3133 ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr); 3134 3135 c->bs2 = a->bs2; 3136 c->mbs = a->mbs; 3137 c->nbs = a->nbs; 3138 3139 if (a->diag) { 3140 if (cpvalues == MAT_SHARE_NONZERO_PATTERN) { 3141 c->diag = a->diag; 3142 c->free_diag = PETSC_FALSE; 3143 } else { 3144 ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr); 3145 ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr); 3146 for (i=0; i<mbs; i++) c->diag[i] = a->diag[i]; 3147 c->free_diag = PETSC_TRUE; 3148 } 3149 } else c->diag = 0; 3150 3151 c->nz = a->nz; 3152 c->maxnz = a->nz; /* Since we allocate exactly the right amount */ 3153 c->solve_work = NULL; 3154 c->mult_work = NULL; 3155 c->sor_workt = NULL; 3156 c->sor_work = NULL; 3157 3158 c->compressedrow.use = a->compressedrow.use; 3159 c->compressedrow.nrows = a->compressedrow.nrows; 3160 if (a->compressedrow.use) { 3161 i = a->compressedrow.nrows; 3162 ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr); 3163 ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3164 ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr); 3165 ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr); 3166 } else { 3167 c->compressedrow.use = PETSC_FALSE; 3168 c->compressedrow.i = NULL; 3169 c->compressedrow.rindex = NULL; 3170 } 3171 C->nonzerostate = A->nonzerostate; 3172 3173 ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr); 3174 ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 3175 PetscFunctionReturn(0); 3176 } 3177 3178 #undef __FUNCT__ 3179 #define __FUNCT__ "MatDuplicate_SeqBAIJ" 3180 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B) 3181 { 3182 PetscErrorCode ierr; 3183 3184 PetscFunctionBegin; 3185 ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr); 3186 ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr); 3187 ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr); 3188 ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr); 3189 PetscFunctionReturn(0); 3190 } 3191 3192 #undef __FUNCT__ 3193 #define __FUNCT__ "MatLoad_SeqBAIJ" 3194 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer) 3195 { 3196 Mat_SeqBAIJ *a; 3197 PetscErrorCode ierr; 3198 PetscInt i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs; 3199 PetscInt *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount; 3200 PetscInt kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols; 3201 PetscInt *masked,nmask,tmp,bs2,ishift; 3202 PetscMPIInt size; 3203 int fd; 3204 PetscScalar *aa; 3205 MPI_Comm comm; 3206 3207 PetscFunctionBegin; 3208 /* force binary viewer to load .info file if it has not yet done so */ 3209 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 3210 ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr); 3211 ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr); 3212 ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr); 3213 ierr = PetscOptionsEnd();CHKERRQ(ierr); 3214 if (bs < 0) bs = 1; 3215 bs2 = bs*bs; 3216 3217 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 3218 if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor"); 3219 ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr); 3220 ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr); 3221 if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object"); 3222 M = header[1]; N = header[2]; nz = header[3]; 3223 3224 if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ"); 3225 if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices"); 3226 3227 /* 3228 This code adds extra rows to make sure the number of rows is 3229 divisible by the blocksize 3230 */ 3231 mbs = M/bs; 3232 extra_rows = bs - M + bs*(mbs); 3233 if (extra_rows == bs) extra_rows = 0; 3234 else mbs++; 3235 if (extra_rows) { 3236 ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr); 3237 } 3238 3239 /* Set global sizes if not already set */ 3240 if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) { 3241 ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr); 3242 } else { /* Check if the matrix global sizes are correct */ 3243 ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr); 3244 if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */ 3245 ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr); 3246 } 3247 if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols); 3248 } 3249 3250 /* read in row lengths */ 3251 ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr); 3252 ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr); 3253 for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1; 3254 3255 /* read in column indices */ 3256 ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr); 3257 ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr); 3258 for (i=0; i<extra_rows; i++) jj[nz+i] = M+i; 3259 3260 /* loop over row lengths determining block row lengths */ 3261 ierr = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr); 3262 ierr = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr); 3263 ierr = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr); 3264 rowcount = 0; 3265 nzcount = 0; 3266 for (i=0; i<mbs; i++) { 3267 nmask = 0; 3268 for (j=0; j<bs; j++) { 3269 kmax = rowlengths[rowcount]; 3270 for (k=0; k<kmax; k++) { 3271 tmp = jj[nzcount++]/bs; 3272 if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;} 3273 } 3274 rowcount++; 3275 } 3276 browlengths[i] += nmask; 3277 /* zero out the mask elements we set */ 3278 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3279 } 3280 3281 /* Do preallocation */ 3282 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr); 3283 a = (Mat_SeqBAIJ*)newmat->data; 3284 3285 /* set matrix "i" values */ 3286 a->i[0] = 0; 3287 for (i=1; i<= mbs; i++) { 3288 a->i[i] = a->i[i-1] + browlengths[i-1]; 3289 a->ilen[i-1] = browlengths[i-1]; 3290 } 3291 a->nz = 0; 3292 for (i=0; i<mbs; i++) a->nz += browlengths[i]; 3293 3294 /* read in nonzero values */ 3295 ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr); 3296 ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr); 3297 for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0; 3298 3299 /* set "a" and "j" values into matrix */ 3300 nzcount = 0; jcount = 0; 3301 for (i=0; i<mbs; i++) { 3302 nzcountb = nzcount; 3303 nmask = 0; 3304 for (j=0; j<bs; j++) { 3305 kmax = rowlengths[i*bs+j]; 3306 for (k=0; k<kmax; k++) { 3307 tmp = jj[nzcount++]/bs; 3308 if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;} 3309 } 3310 } 3311 /* sort the masked values */ 3312 ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr); 3313 3314 /* set "j" values into matrix */ 3315 maskcount = 1; 3316 for (j=0; j<nmask; j++) { 3317 a->j[jcount++] = masked[j]; 3318 mask[masked[j]] = maskcount++; 3319 } 3320 /* set "a" values into matrix */ 3321 ishift = bs2*a->i[i]; 3322 for (j=0; j<bs; j++) { 3323 kmax = rowlengths[i*bs+j]; 3324 for (k=0; k<kmax; k++) { 3325 tmp = jj[nzcountb]/bs; 3326 block = mask[tmp] - 1; 3327 point = jj[nzcountb] - bs*tmp; 3328 idx = ishift + bs2*block + j + bs*point; 3329 a->a[idx] = (MatScalar)aa[nzcountb++]; 3330 } 3331 } 3332 /* zero out the mask elements we set */ 3333 for (j=0; j<nmask; j++) mask[masked[j]] = 0; 3334 } 3335 if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix"); 3336 3337 ierr = PetscFree(rowlengths);CHKERRQ(ierr); 3338 ierr = PetscFree(browlengths);CHKERRQ(ierr); 3339 ierr = PetscFree(aa);CHKERRQ(ierr); 3340 ierr = PetscFree(jj);CHKERRQ(ierr); 3341 ierr = PetscFree2(mask,masked);CHKERRQ(ierr); 3342 3343 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3344 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3345 PetscFunctionReturn(0); 3346 } 3347 3348 #undef __FUNCT__ 3349 #define __FUNCT__ "MatCreateSeqBAIJ" 3350 /*@C 3351 MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block 3352 compressed row) format. For good matrix assembly performance the 3353 user should preallocate the matrix storage by setting the parameter nz 3354 (or the array nnz). By setting these parameters accurately, performance 3355 during matrix assembly can be increased by more than a factor of 50. 3356 3357 Collective on MPI_Comm 3358 3359 Input Parameters: 3360 + comm - MPI communicator, set to PETSC_COMM_SELF 3361 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3362 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3363 . m - number of rows 3364 . n - number of columns 3365 . nz - number of nonzero blocks per block row (same for all rows) 3366 - nnz - array containing the number of nonzero blocks in the various block rows 3367 (possibly different for each block row) or NULL 3368 3369 Output Parameter: 3370 . A - the matrix 3371 3372 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 3373 MatXXXXSetPreallocation() paradgm instead of this routine directly. 3374 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 3375 3376 Options Database Keys: 3377 . -mat_no_unroll - uses code that does not unroll the loops in the 3378 block calculations (much slower) 3379 . -mat_block_size - size of the blocks to use 3380 3381 Level: intermediate 3382 3383 Notes: 3384 The number of rows and columns must be divisible by blocksize. 3385 3386 If the nnz parameter is given then the nz parameter is ignored 3387 3388 A nonzero block is any block that as 1 or more nonzeros in it 3389 3390 The block AIJ format is fully compatible with standard Fortran 77 3391 storage. That is, the stored row and column indices can begin at 3392 either one (as in Fortran) or zero. See the users' manual for details. 3393 3394 Specify the preallocated storage with either nz or nnz (not both). 3395 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3396 allocation. See Users-Manual: ch_mat for details. 3397 matrices. 3398 3399 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ() 3400 @*/ 3401 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A) 3402 { 3403 PetscErrorCode ierr; 3404 3405 PetscFunctionBegin; 3406 ierr = MatCreate(comm,A);CHKERRQ(ierr); 3407 ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr); 3408 ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr); 3409 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr); 3410 PetscFunctionReturn(0); 3411 } 3412 3413 #undef __FUNCT__ 3414 #define __FUNCT__ "MatSeqBAIJSetPreallocation" 3415 /*@C 3416 MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros 3417 per row in the matrix. For good matrix assembly performance the 3418 user should preallocate the matrix storage by setting the parameter nz 3419 (or the array nnz). By setting these parameters accurately, performance 3420 during matrix assembly can be increased by more than a factor of 50. 3421 3422 Collective on MPI_Comm 3423 3424 Input Parameters: 3425 + B - the matrix 3426 . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row 3427 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 3428 . nz - number of block nonzeros per block row (same for all rows) 3429 - nnz - array containing the number of block nonzeros in the various block rows 3430 (possibly different for each block row) or NULL 3431 3432 Options Database Keys: 3433 . -mat_no_unroll - uses code that does not unroll the loops in the 3434 block calculations (much slower) 3435 . -mat_block_size - size of the blocks to use 3436 3437 Level: intermediate 3438 3439 Notes: 3440 If the nnz parameter is given then the nz parameter is ignored 3441 3442 You can call MatGetInfo() to get information on how effective the preallocation was; 3443 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3444 You can also run with the option -info and look for messages with the string 3445 malloc in them to see if additional memory allocation was needed. 3446 3447 The block AIJ format is fully compatible with standard Fortran 77 3448 storage. That is, the stored row and column indices can begin at 3449 either one (as in Fortran) or zero. See the users' manual for details. 3450 3451 Specify the preallocated storage with either nz or nnz (not both). 3452 Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory 3453 allocation. See Users-Manual: ch_mat for details. 3454 3455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo() 3456 @*/ 3457 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[]) 3458 { 3459 PetscErrorCode ierr; 3460 3461 PetscFunctionBegin; 3462 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3463 PetscValidType(B,1); 3464 PetscValidLogicalCollectiveInt(B,bs,2); 3465 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr); 3466 PetscFunctionReturn(0); 3467 } 3468 3469 #undef __FUNCT__ 3470 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR" 3471 /*@C 3472 MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format 3473 (the default sequential PETSc format). 3474 3475 Collective on MPI_Comm 3476 3477 Input Parameters: 3478 + B - the matrix 3479 . i - the indices into j for the start of each local row (starts with zero) 3480 . j - the column indices for each local row (starts with zero) these must be sorted for each row 3481 - v - optional values in the matrix 3482 3483 Level: developer 3484 3485 Notes: 3486 The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED. For example, C programs 3487 may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is 3488 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 3489 MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 3490 block column and the second index is over columns within a block. 3491 3492 .keywords: matrix, aij, compressed row, sparse 3493 3494 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ 3495 @*/ 3496 PetscErrorCode MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[]) 3497 { 3498 PetscErrorCode ierr; 3499 3500 PetscFunctionBegin; 3501 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 3502 PetscValidType(B,1); 3503 PetscValidLogicalCollectiveInt(B,bs,2); 3504 ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr); 3505 PetscFunctionReturn(0); 3506 } 3507 3508 3509 #undef __FUNCT__ 3510 #define __FUNCT__ "MatCreateSeqBAIJWithArrays" 3511 /*@ 3512 MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user. 3513 3514 Collective on MPI_Comm 3515 3516 Input Parameters: 3517 + comm - must be an MPI communicator of size 1 3518 . bs - size of block 3519 . m - number of rows 3520 . n - number of columns 3521 . i - row indices 3522 . j - column indices 3523 - a - matrix values 3524 3525 Output Parameter: 3526 . mat - the matrix 3527 3528 Level: advanced 3529 3530 Notes: 3531 The i, j, and a arrays are not copied by this routine, the user must free these arrays 3532 once the matrix is destroyed 3533 3534 You cannot set new nonzero locations into this matrix, that will generate an error. 3535 3536 The i and j indices are 0 based 3537 3538 When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this). 3539 3540 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3541 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3542 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3543 with column-major ordering within blocks. 3544 3545 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ() 3546 3547 @*/ 3548 PetscErrorCode MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat) 3549 { 3550 PetscErrorCode ierr; 3551 PetscInt ii; 3552 Mat_SeqBAIJ *baij; 3553 3554 PetscFunctionBegin; 3555 if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs); 3556 if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0"); 3557 3558 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 3559 ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr); 3560 ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr); 3561 ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr); 3562 baij = (Mat_SeqBAIJ*)(*mat)->data; 3563 ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr); 3564 ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr); 3565 3566 baij->i = i; 3567 baij->j = j; 3568 baij->a = a; 3569 3570 baij->singlemalloc = PETSC_FALSE; 3571 baij->nonew = -1; /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/ 3572 baij->free_a = PETSC_FALSE; 3573 baij->free_ij = PETSC_FALSE; 3574 3575 for (ii=0; ii<m; ii++) { 3576 baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii]; 3577 #if defined(PETSC_USE_DEBUG) 3578 if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]); 3579 #endif 3580 } 3581 #if defined(PETSC_USE_DEBUG) 3582 for (ii=0; ii<baij->i[m]; ii++) { 3583 if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]); 3584 if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]); 3585 } 3586 #endif 3587 3588 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3589 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3590 PetscFunctionReturn(0); 3591 } 3592 3593 #undef __FUNCT__ 3594 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ" 3595 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 3596 { 3597 PetscErrorCode ierr; 3598 3599 PetscFunctionBegin; 3600 ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr); 3601 PetscFunctionReturn(0); 3602 } 3603