1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 #if defined(PETSC_USE_64BIT_INDICES) 15 fftw_iodim64 *iodims; 16 #else 17 fftw_iodim *iodims; 18 #endif 19 PetscInt partial_dim; 20 fftw_plan p_forward,p_backward; 21 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 22 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 23 executed for the arrays with which the plan was created */ 24 } Mat_FFTW; 25 26 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 27 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 28 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 29 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 30 extern PetscErrorCode MatDestroy_FFTW(Mat); 31 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 32 extern PetscErrorCode MatCreateVecsFFTW_FFTW(Mat,Vec*,Vec*,Vec*); 33 34 /* MatMult_SeqFFTW performs forward DFT in parallel 35 Input parameter: 36 A - the matrix 37 x - the vector on which FDFT will be performed 38 39 Output parameter: 40 y - vector that stores result of FDFT 41 */ 42 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 43 { 44 PetscErrorCode ierr; 45 Mat_FFT *fft = (Mat_FFT*)A->data; 46 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 47 const PetscScalar *x_array; 48 PetscScalar *y_array; 49 #if defined(PETSC_USE_COMPLEX) 50 #if defined(PETSC_USE_64BIT_INDICES) 51 fftw_iodim64 *iodims; 52 #else 53 fftw_iodim *iodims; 54 #endif 55 PetscInt i; 56 #endif 57 PetscInt ndim = fft->ndim,*dim = fft->dim; 58 59 PetscFunctionBegin; 60 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 61 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 62 if (!fftw->p_forward) { /* create a plan, then excute it */ 63 switch (ndim) { 64 case 1: 65 #if defined(PETSC_USE_COMPLEX) 66 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 67 #else 68 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 69 #endif 70 break; 71 case 2: 72 #if defined(PETSC_USE_COMPLEX) 73 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 74 #else 75 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 76 #endif 77 break; 78 case 3: 79 #if defined(PETSC_USE_COMPLEX) 80 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 81 #else 82 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 83 #endif 84 break; 85 default: 86 #if defined(PETSC_USE_COMPLEX) 87 iodims = fftw->iodims; 88 #if defined(PETSC_USE_64BIT_INDICES) 89 if (ndim) { 90 iodims[ndim-1].n = (ptrdiff_t)dim[ndim-1]; 91 iodims[ndim-1].is = iodims[ndim-1].os = 1; 92 for (i=ndim-2; i>=0; --i) { 93 iodims[i].n = (ptrdiff_t)dim[i]; 94 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 95 } 96 } 97 fftw->p_forward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 98 #else 99 if (ndim) { 100 iodims[ndim-1].n = (int)dim[ndim-1]; 101 iodims[ndim-1].is = iodims[ndim-1].os = 1; 102 for (i=ndim-2; i>=0; --i) { 103 iodims[i].n = (int)dim[i]; 104 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 105 } 106 } 107 fftw->p_forward = fftw_plan_guru_dft((int)ndim,(fftw_iodim*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 108 #endif 109 110 #else 111 fftw->p_forward = fftw_plan_dft_r2c(ndim,(int*)dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 112 #endif 113 break; 114 } 115 fftw->finarray = (PetscScalar *) x_array; 116 fftw->foutarray = y_array; 117 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 118 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 119 fftw_execute(fftw->p_forward); 120 } else { /* use existing plan */ 121 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 122 #if defined(PETSC_USE_COMPLEX) 123 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 124 #else 125 fftw_execute_dft_r2c(fftw->p_forward,(double*)x_array,(fftw_complex*)y_array); 126 #endif 127 } else { 128 fftw_execute(fftw->p_forward); 129 } 130 } 131 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 132 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 133 PetscFunctionReturn(0); 134 } 135 136 /* MatMultTranspose_SeqFFTW performs serial backward DFT 137 Input parameter: 138 A - the matrix 139 x - the vector on which BDFT will be performed 140 141 Output parameter: 142 y - vector that stores result of BDFT 143 */ 144 145 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 146 { 147 PetscErrorCode ierr; 148 Mat_FFT *fft = (Mat_FFT*)A->data; 149 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 150 const PetscScalar *x_array; 151 PetscScalar *y_array; 152 PetscInt ndim=fft->ndim,*dim=fft->dim; 153 #if defined(PETSC_USE_COMPLEX) 154 #if defined(PETSC_USE_64BIT_INDICES) 155 fftw_iodim64 *iodims=fftw->iodims; 156 #else 157 fftw_iodim *iodims=fftw->iodims; 158 #endif 159 #endif 160 161 PetscFunctionBegin; 162 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 163 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 164 if (!fftw->p_backward) { /* create a plan, then excute it */ 165 switch (ndim) { 166 case 1: 167 #if defined(PETSC_USE_COMPLEX) 168 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 169 #else 170 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 171 #endif 172 break; 173 case 2: 174 #if defined(PETSC_USE_COMPLEX) 175 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 176 #else 177 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 178 #endif 179 break; 180 case 3: 181 #if defined(PETSC_USE_COMPLEX) 182 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 183 #else 184 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 185 #endif 186 break; 187 default: 188 #if defined(PETSC_USE_COMPLEX) 189 #if defined(PETSC_USE_64BIT_INDICES) 190 fftw->p_backward = fftw_plan_guru64_dft((int)ndim,(fftw_iodim64*)iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 191 #else 192 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 193 #endif 194 #else 195 fftw->p_backward= fftw_plan_dft_c2r((int)ndim,(int*)dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 196 #endif 197 break; 198 } 199 fftw->binarray = (PetscScalar *) x_array; 200 fftw->boutarray = y_array; 201 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 202 } else { /* use existing plan */ 203 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 204 #if defined(PETSC_USE_COMPLEX) 205 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 206 #else 207 fftw_execute_dft_c2r(fftw->p_backward,(fftw_complex*)x_array,(double*)y_array); 208 #endif 209 } else { 210 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 211 } 212 } 213 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 214 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 215 PetscFunctionReturn(0); 216 } 217 218 /* MatMult_MPIFFTW performs forward DFT in parallel 219 Input parameter: 220 A - the matrix 221 x - the vector on which FDFT will be performed 222 223 Output parameter: 224 y - vector that stores result of FDFT 225 */ 226 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 227 { 228 PetscErrorCode ierr; 229 Mat_FFT *fft = (Mat_FFT*)A->data; 230 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 231 const PetscScalar *x_array; 232 PetscScalar *y_array; 233 PetscInt ndim=fft->ndim,*dim=fft->dim; 234 MPI_Comm comm; 235 236 PetscFunctionBegin; 237 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 238 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 239 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 240 if (!fftw->p_forward) { /* create a plan, then excute it */ 241 switch (ndim) { 242 case 1: 243 #if defined(PETSC_USE_COMPLEX) 244 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 245 #else 246 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 247 #endif 248 break; 249 case 2: 250 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 251 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 252 #else 253 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 254 #endif 255 break; 256 case 3: 257 #if defined(PETSC_USE_COMPLEX) 258 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 259 #else 260 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 261 #endif 262 break; 263 default: 264 #if defined(PETSC_USE_COMPLEX) 265 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 266 #else 267 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 268 #endif 269 break; 270 } 271 fftw->finarray = (PetscScalar *) x_array; 272 fftw->foutarray = y_array; 273 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 274 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 275 fftw_execute(fftw->p_forward); 276 } else { /* use existing plan */ 277 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 278 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 279 } else { 280 fftw_execute(fftw->p_forward); 281 } 282 } 283 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 284 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 285 PetscFunctionReturn(0); 286 } 287 288 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 289 Input parameter: 290 A - the matrix 291 x - the vector on which BDFT will be performed 292 293 Output parameter: 294 y - vector that stores result of BDFT 295 */ 296 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 297 { 298 PetscErrorCode ierr; 299 Mat_FFT *fft = (Mat_FFT*)A->data; 300 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 301 const PetscScalar *x_array; 302 PetscScalar *y_array; 303 PetscInt ndim=fft->ndim,*dim=fft->dim; 304 MPI_Comm comm; 305 306 PetscFunctionBegin; 307 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 308 ierr = VecGetArrayRead(x,&x_array);CHKERRQ(ierr); 309 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 310 if (!fftw->p_backward) { /* create a plan, then excute it */ 311 switch (ndim) { 312 case 1: 313 #if defined(PETSC_USE_COMPLEX) 314 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 315 #else 316 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 317 #endif 318 break; 319 case 2: 320 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 321 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 322 #else 323 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 324 #endif 325 break; 326 case 3: 327 #if defined(PETSC_USE_COMPLEX) 328 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 329 #else 330 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 331 #endif 332 break; 333 default: 334 #if defined(PETSC_USE_COMPLEX) 335 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 336 #else 337 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 338 #endif 339 break; 340 } 341 fftw->binarray = (PetscScalar *) x_array; 342 fftw->boutarray = y_array; 343 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 344 } else { /* use existing plan */ 345 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 346 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 347 } else { 348 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 349 } 350 } 351 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 352 ierr = VecRestoreArrayRead(x,&x_array);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 PetscErrorCode MatDestroy_FFTW(Mat A) 357 { 358 Mat_FFT *fft = (Mat_FFT*)A->data; 359 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 360 PetscErrorCode ierr; 361 362 PetscFunctionBegin; 363 fftw_destroy_plan(fftw->p_forward); 364 fftw_destroy_plan(fftw->p_backward); 365 if (fftw->iodims) { 366 free(fftw->iodims); 367 } 368 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 369 ierr = PetscFree(fft->data);CHKERRQ(ierr); 370 fftw_mpi_cleanup(); 371 PetscFunctionReturn(0); 372 } 373 374 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 375 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 376 { 377 PetscErrorCode ierr; 378 PetscScalar *array; 379 380 PetscFunctionBegin; 381 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 382 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 383 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 384 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 385 PetscFunctionReturn(0); 386 } 387 388 389 static PetscErrorCode VecDuplicate_FFTW_fin(Vec fin,Vec *fin_new) 390 { 391 PetscErrorCode ierr; 392 Mat A; 393 394 PetscFunctionBegin; 395 ierr = PetscObjectQuery((PetscObject)fin,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 396 ierr = MatCreateVecsFFTW_FFTW(A,fin_new,NULL,NULL);CHKERRQ(ierr); 397 PetscFunctionReturn(0); 398 } 399 400 static PetscErrorCode VecDuplicate_FFTW_fout(Vec fout,Vec *fout_new) 401 { 402 PetscErrorCode ierr; 403 Mat A; 404 405 PetscFunctionBegin; 406 ierr = PetscObjectQuery((PetscObject)fout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 407 ierr = MatCreateVecsFFTW_FFTW(A,NULL,fout_new,NULL);CHKERRQ(ierr); 408 PetscFunctionReturn(0); 409 } 410 411 static PetscErrorCode VecDuplicate_FFTW_bout(Vec bout, Vec *bout_new) 412 { 413 PetscErrorCode ierr; 414 Mat A; 415 416 PetscFunctionBegin; 417 ierr = PetscObjectQuery((PetscObject)bout,"FFTmatrix",(PetscObject*)&A);CHKERRQ(ierr); 418 ierr = MatCreateVecsFFTW_FFTW(A,NULL,NULL,bout_new);CHKERRQ(ierr); 419 PetscFunctionReturn(0); 420 } 421 422 423 /*@ 424 MatCreateVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 425 parallel layout determined by FFTW 426 427 Collective on Mat 428 429 Input Parameter: 430 . A - the matrix 431 432 Output Parameter: 433 + x - (optional) input vector of forward FFTW 434 - y - (optional) output vector of forward FFTW 435 - z - (optional) output vector of backward FFTW 436 437 Level: advanced 438 439 Note: The parallel layout of output of forward FFTW is always same as the input 440 of the backward FFTW. But parallel layout of the input vector of forward 441 FFTW might not be same as the output of backward FFTW. 442 Also note that we need to provide enough space while doing parallel real transform. 443 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 444 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 445 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 446 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 447 zeros if it is odd only one zero is needed. 448 Lastly one needs some scratch space at the end of data set in each process. alloc_local 449 figures out how much space is needed, i.e. it figures out the data+scratch space for 450 each processor and returns that. 451 452 .seealso: MatCreateFFT() 453 @*/ 454 PetscErrorCode MatCreateVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 455 { 456 PetscErrorCode ierr; 457 458 PetscFunctionBegin; 459 ierr = PetscUseMethod(A,"MatCreateVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 460 PetscFunctionReturn(0); 461 } 462 463 PetscErrorCode MatCreateVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 464 { 465 PetscErrorCode ierr; 466 PetscMPIInt size,rank; 467 MPI_Comm comm; 468 Mat_FFT *fft = (Mat_FFT*)A->data; 469 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 470 PetscInt N = fft->N; 471 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 472 473 PetscFunctionBegin; 474 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 475 PetscValidType(A,1); 476 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 477 478 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 479 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 480 if (size == 1) { /* sequential case */ 481 #if defined(PETSC_USE_COMPLEX) 482 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 483 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 484 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 485 #else 486 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 487 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 488 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 489 #endif 490 } else { /* parallel cases */ 491 ptrdiff_t alloc_local,local_n0,local_0_start; 492 ptrdiff_t local_n1; 493 fftw_complex *data_fout; 494 ptrdiff_t local_1_start; 495 #if defined(PETSC_USE_COMPLEX) 496 fftw_complex *data_fin,*data_bout; 497 #else 498 double *data_finr,*data_boutr; 499 PetscInt n1,N1; 500 ptrdiff_t temp; 501 #endif 502 503 switch (ndim) { 504 case 1: 505 #if !defined(PETSC_USE_COMPLEX) 506 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 507 #else 508 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 509 if (fin) { 510 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 511 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 512 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 513 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 514 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 515 } 516 if (fout) { 517 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 518 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 519 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 520 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 521 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 522 } 523 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 524 if (bout) { 525 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 526 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 527 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 528 (*bout)->ops->duplicate = VecDuplicate_FFTW_fout; 529 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 530 } 531 break; 532 #endif 533 case 2: 534 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 535 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 536 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 537 if (fin) { 538 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 539 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 540 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 541 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 542 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 543 } 544 if (fout) { 545 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 546 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 547 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 548 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 549 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 550 } 551 if (bout) { 552 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 553 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 554 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 555 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 556 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 #else 559 /* Get local size */ 560 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 561 if (fin) { 562 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 563 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 564 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 565 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 566 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 567 } 568 if (fout) { 569 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 570 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 571 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 572 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 573 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 574 } 575 if (bout) { 576 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 577 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 578 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 579 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 580 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 581 } 582 #endif 583 break; 584 case 3: 585 #if !defined(PETSC_USE_COMPLEX) 586 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 587 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 588 if (fin) { 589 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 590 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 591 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 592 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 593 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 594 } 595 if (fout) { 596 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 597 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 598 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 599 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 600 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 601 } 602 if (bout) { 603 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 604 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 605 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 606 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 607 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 608 } 609 #else 610 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 611 if (fin) { 612 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 613 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 614 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 615 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 616 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 617 } 618 if (fout) { 619 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 620 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 621 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 622 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 623 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 624 } 625 if (bout) { 626 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 627 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 628 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 629 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 630 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 631 } 632 #endif 633 break; 634 default: 635 #if !defined(PETSC_USE_COMPLEX) 636 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 637 638 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 639 640 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 641 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 642 643 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 644 645 if (fin) { 646 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 647 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 648 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 649 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 650 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 651 } 652 if (fout) { 653 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 654 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 655 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 656 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 657 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 658 } 659 if (bout) { 660 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 661 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 662 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 663 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 664 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 665 } 666 #else 667 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 668 if (fin) { 669 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 670 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 671 ierr = PetscObjectCompose((PetscObject)*fin,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 672 (*fin)->ops->duplicate = VecDuplicate_FFTW_fin; 673 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 674 } 675 if (fout) { 676 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 677 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 678 ierr = PetscObjectCompose((PetscObject)*fout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 679 (*fout)->ops->duplicate = VecDuplicate_FFTW_fout; 680 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 681 } 682 if (bout) { 683 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 684 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 685 ierr = PetscObjectCompose((PetscObject)*bout,"FFTmatrix",(PetscObject)A);CHKERRQ(ierr); 686 (*bout)->ops->duplicate = VecDuplicate_FFTW_bout; 687 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 688 } 689 #endif 690 break; 691 } 692 /* fftw vectors have their data array allocated by fftw_malloc, such that v->array=xxx but 693 v->array_allocated=NULL. A regular replacearray call won't free the memory and only causes 694 memory leaks. We void these pointers here to avoid acident uses. 695 */ 696 if (fin) (*fin)->ops->replacearray = NULL; 697 if (fout) (*fout)->ops->replacearray = NULL; 698 if (bout) (*bout)->ops->replacearray = NULL; 699 } 700 PetscFunctionReturn(0); 701 } 702 703 /*@ 704 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 705 706 Collective on Mat 707 708 Input Parameters: 709 + A - FFTW matrix 710 - x - the PETSc vector 711 712 Output Parameters: 713 . y - the FFTW vector 714 715 Options Database Keys: 716 . -mat_fftw_plannerflags - set FFTW planner flags 717 718 Level: intermediate 719 720 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 721 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 722 zeros. This routine does that job by scattering operation. 723 724 .seealso: VecScatterFFTWToPetsc() 725 @*/ 726 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 727 { 728 PetscErrorCode ierr; 729 730 PetscFunctionBegin; 731 ierr = PetscUseMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 732 PetscFunctionReturn(0); 733 } 734 735 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 736 { 737 PetscErrorCode ierr; 738 MPI_Comm comm; 739 Mat_FFT *fft = (Mat_FFT*)A->data; 740 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 741 PetscInt N =fft->N; 742 PetscInt ndim =fft->ndim,*dim=fft->dim; 743 PetscInt low; 744 PetscMPIInt rank,size; 745 PetscInt vsize,vsize1; 746 ptrdiff_t local_n0,local_0_start; 747 ptrdiff_t local_n1,local_1_start; 748 VecScatter vecscat; 749 IS list1,list2; 750 #if !defined(PETSC_USE_COMPLEX) 751 PetscInt i,j,k,partial_dim; 752 PetscInt *indx1, *indx2, tempindx, tempindx1; 753 PetscInt NM; 754 ptrdiff_t temp; 755 #endif 756 757 PetscFunctionBegin; 758 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 759 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 760 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 761 ierr = VecGetOwnershipRange(y,&low,NULL); 762 763 if (size==1) { 764 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 765 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 766 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 767 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 768 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 769 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 770 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 771 ierr = ISDestroy(&list1);CHKERRQ(ierr); 772 } else { 773 switch (ndim) { 774 case 1: 775 #if defined(PETSC_USE_COMPLEX) 776 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 777 778 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 779 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 780 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 781 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 782 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 783 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 784 ierr = ISDestroy(&list1);CHKERRQ(ierr); 785 ierr = ISDestroy(&list2);CHKERRQ(ierr); 786 #else 787 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 788 #endif 789 break; 790 case 2: 791 #if defined(PETSC_USE_COMPLEX) 792 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 793 794 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 795 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 796 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 797 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 798 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 799 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 800 ierr = ISDestroy(&list1);CHKERRQ(ierr); 801 ierr = ISDestroy(&list2);CHKERRQ(ierr); 802 #else 803 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 804 805 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 806 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 807 808 if (dim[1]%2==0) { 809 NM = dim[1]+2; 810 } else { 811 NM = dim[1]+1; 812 } 813 for (i=0; i<local_n0; i++) { 814 for (j=0; j<dim[1]; j++) { 815 tempindx = i*dim[1] + j; 816 tempindx1 = i*NM + j; 817 818 indx1[tempindx]=local_0_start*dim[1]+tempindx; 819 indx2[tempindx]=low+tempindx1; 820 } 821 } 822 823 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 824 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 825 826 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 827 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 828 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 829 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 830 ierr = ISDestroy(&list1);CHKERRQ(ierr); 831 ierr = ISDestroy(&list2);CHKERRQ(ierr); 832 ierr = PetscFree(indx1);CHKERRQ(ierr); 833 ierr = PetscFree(indx2);CHKERRQ(ierr); 834 #endif 835 break; 836 837 case 3: 838 #if defined(PETSC_USE_COMPLEX) 839 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 840 841 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 842 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 843 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 844 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 845 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 846 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 847 ierr = ISDestroy(&list1);CHKERRQ(ierr); 848 ierr = ISDestroy(&list2);CHKERRQ(ierr); 849 #else 850 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 851 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 3D real transform"); 852 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 853 854 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 855 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 856 857 if (dim[2]%2==0) NM = dim[2]+2; 858 else NM = dim[2]+1; 859 860 for (i=0; i<local_n0; i++) { 861 for (j=0; j<dim[1]; j++) { 862 for (k=0; k<dim[2]; k++) { 863 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 864 tempindx1 = i*dim[1]*NM + j*NM + k; 865 866 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 867 indx2[tempindx]=low+tempindx1; 868 } 869 } 870 } 871 872 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 873 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 874 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 875 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 876 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 878 ierr = ISDestroy(&list1);CHKERRQ(ierr); 879 ierr = ISDestroy(&list2);CHKERRQ(ierr); 880 ierr = PetscFree(indx1);CHKERRQ(ierr); 881 ierr = PetscFree(indx2);CHKERRQ(ierr); 882 #endif 883 break; 884 885 default: 886 #if defined(PETSC_USE_COMPLEX) 887 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 888 889 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 890 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 891 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 892 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 894 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 895 ierr = ISDestroy(&list1);CHKERRQ(ierr); 896 ierr = ISDestroy(&list2);CHKERRQ(ierr); 897 #else 898 /* buggy, needs to be fixed. See src/mat/tests/ex158.c */ 899 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel DIM>3 real transform"); 900 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 901 902 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 903 904 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 905 906 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 907 908 partial_dim = fftw->partial_dim; 909 910 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 911 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 912 913 if (dim[ndim-1]%2==0) NM = 2; 914 else NM = 1; 915 916 j = low; 917 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 918 indx1[i] = local_0_start*partial_dim + i; 919 indx2[i] = j; 920 if (k%dim[ndim-1]==0) j+=NM; 921 j++; 922 } 923 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 924 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 925 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 926 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 927 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 928 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 929 ierr = ISDestroy(&list1);CHKERRQ(ierr); 930 ierr = ISDestroy(&list2);CHKERRQ(ierr); 931 ierr = PetscFree(indx1);CHKERRQ(ierr); 932 ierr = PetscFree(indx2);CHKERRQ(ierr); 933 #endif 934 break; 935 } 936 } 937 PetscFunctionReturn(0); 938 } 939 940 /*@ 941 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 942 943 Collective on Mat 944 945 Input Parameters: 946 + A - FFTW matrix 947 - x - FFTW vector 948 949 Output Parameters: 950 . y - PETSc vector 951 952 Level: intermediate 953 954 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 955 VecScatterFFTWToPetsc removes those extra zeros. 956 957 .seealso: VecScatterPetscToFFTW() 958 @*/ 959 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 960 { 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 ierr = PetscUseMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 969 { 970 PetscErrorCode ierr; 971 MPI_Comm comm; 972 Mat_FFT *fft = (Mat_FFT*)A->data; 973 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 974 PetscInt N = fft->N; 975 PetscInt ndim = fft->ndim,*dim=fft->dim; 976 PetscInt low; 977 PetscMPIInt rank,size; 978 ptrdiff_t local_n0,local_0_start; 979 ptrdiff_t local_n1,local_1_start; 980 VecScatter vecscat; 981 IS list1,list2; 982 #if !defined(PETSC_USE_COMPLEX) 983 PetscInt i,j,k,partial_dim; 984 PetscInt *indx1, *indx2, tempindx, tempindx1; 985 PetscInt NM; 986 ptrdiff_t temp; 987 #endif 988 989 PetscFunctionBegin; 990 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 991 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 992 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 993 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 994 995 if (size==1) { 996 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 997 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 998 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 999 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1000 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1001 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1002 1003 } else { 1004 switch (ndim) { 1005 case 1: 1006 #if defined(PETSC_USE_COMPLEX) 1007 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1008 1009 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 1010 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 1011 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1012 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1014 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1015 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1016 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1017 #else 1018 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1019 #endif 1020 break; 1021 case 2: 1022 #if defined(PETSC_USE_COMPLEX) 1023 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1024 1025 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1026 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 1027 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1028 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1029 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1030 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1031 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1032 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1033 #else 1034 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1035 1036 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1037 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1038 1039 if (dim[1]%2==0) NM = dim[1]+2; 1040 else NM = dim[1]+1; 1041 1042 for (i=0; i<local_n0; i++) { 1043 for (j=0; j<dim[1]; j++) { 1044 tempindx = i*dim[1] + j; 1045 tempindx1 = i*NM + j; 1046 1047 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1048 indx2[tempindx]=low+tempindx1; 1049 } 1050 } 1051 1052 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1053 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1054 1055 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1056 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1057 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1058 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1059 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1060 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1061 ierr = PetscFree(indx1);CHKERRQ(ierr); 1062 ierr = PetscFree(indx2);CHKERRQ(ierr); 1063 #endif 1064 break; 1065 case 3: 1066 #if defined(PETSC_USE_COMPLEX) 1067 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1068 1069 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1070 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 1071 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1072 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1073 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1074 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1075 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1076 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1077 #else 1078 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1079 1080 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1081 ierr = PetscMalloc1(((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1082 1083 if (dim[2]%2==0) NM = dim[2]+2; 1084 else NM = dim[2]+1; 1085 1086 for (i=0; i<local_n0; i++) { 1087 for (j=0; j<dim[1]; j++) { 1088 for (k=0; k<dim[2]; k++) { 1089 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1090 tempindx1 = i*dim[1]*NM + j*NM + k; 1091 1092 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1093 indx2[tempindx]=low+tempindx1; 1094 } 1095 } 1096 } 1097 1098 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1099 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1100 1101 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1102 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1103 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1104 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1105 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1106 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1107 ierr = PetscFree(indx1);CHKERRQ(ierr); 1108 ierr = PetscFree(indx2);CHKERRQ(ierr); 1109 #endif 1110 break; 1111 default: 1112 #if defined(PETSC_USE_COMPLEX) 1113 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1114 1115 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1116 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1117 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1118 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1119 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1120 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1121 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1122 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1123 #else 1124 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1125 1126 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1127 1128 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1129 1130 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1131 1132 partial_dim = fftw->partial_dim; 1133 1134 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1135 ierr = PetscMalloc1(((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1136 1137 if (dim[ndim-1]%2==0) NM = 2; 1138 else NM = 1; 1139 1140 j = low; 1141 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1142 indx1[i] = local_0_start*partial_dim + i; 1143 indx2[i] = j; 1144 if (k%dim[ndim-1]==0) j+=NM; 1145 j++; 1146 } 1147 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1148 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1149 1150 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1151 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1152 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1153 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1154 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1155 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1156 ierr = PetscFree(indx1);CHKERRQ(ierr); 1157 ierr = PetscFree(indx2);CHKERRQ(ierr); 1158 #endif 1159 break; 1160 } 1161 } 1162 PetscFunctionReturn(0); 1163 } 1164 1165 /* 1166 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1167 1168 Options Database Keys: 1169 + -mat_fftw_plannerflags - set FFTW planner flags 1170 1171 Level: intermediate 1172 1173 */ 1174 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1175 { 1176 PetscErrorCode ierr; 1177 MPI_Comm comm; 1178 Mat_FFT *fft=(Mat_FFT*)A->data; 1179 Mat_FFTW *fftw; 1180 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1181 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1182 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1183 PetscBool flg; 1184 PetscInt p_flag,partial_dim=1,ctr; 1185 PetscMPIInt size,rank; 1186 ptrdiff_t *pdim; 1187 ptrdiff_t local_n1,local_1_start; 1188 #if !defined(PETSC_USE_COMPLEX) 1189 ptrdiff_t temp; 1190 PetscInt N1,tot_dim; 1191 #else 1192 PetscInt n1; 1193 #endif 1194 1195 PetscFunctionBegin; 1196 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1197 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1198 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1199 1200 fftw_mpi_init(); 1201 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1202 pdim[0] = dim[0]; 1203 #if !defined(PETSC_USE_COMPLEX) 1204 tot_dim = 2*dim[0]; 1205 #endif 1206 for (ctr=1; ctr<ndim; ctr++) { 1207 partial_dim *= dim[ctr]; 1208 pdim[ctr] = dim[ctr]; 1209 #if !defined(PETSC_USE_COMPLEX) 1210 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1211 else tot_dim *= dim[ctr]; 1212 #endif 1213 } 1214 1215 if (size == 1) { 1216 #if defined(PETSC_USE_COMPLEX) 1217 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1218 n = N; 1219 #else 1220 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1221 n = tot_dim; 1222 #endif 1223 1224 } else { 1225 ptrdiff_t local_n0,local_0_start; 1226 switch (ndim) { 1227 case 1: 1228 #if !defined(PETSC_USE_COMPLEX) 1229 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1230 #else 1231 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1232 1233 n = (PetscInt)local_n0; 1234 n1 = (PetscInt)local_n1; 1235 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1236 #endif 1237 break; 1238 case 2: 1239 #if defined(PETSC_USE_COMPLEX) 1240 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1241 /* 1242 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1243 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1244 */ 1245 n = (PetscInt)local_n0*dim[1]; 1246 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1247 #else 1248 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1249 1250 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1251 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1252 #endif 1253 break; 1254 case 3: 1255 #if defined(PETSC_USE_COMPLEX) 1256 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1257 1258 n = (PetscInt)local_n0*dim[1]*dim[2]; 1259 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1260 #else 1261 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1262 1263 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1264 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1265 #endif 1266 break; 1267 default: 1268 #if defined(PETSC_USE_COMPLEX) 1269 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1270 1271 n = (PetscInt)local_n0*partial_dim; 1272 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1273 #else 1274 temp = pdim[ndim-1]; 1275 1276 pdim[ndim-1] = temp/2 + 1; 1277 1278 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1279 1280 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1281 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1282 1283 pdim[ndim-1] = temp; 1284 1285 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1286 #endif 1287 break; 1288 } 1289 } 1290 free(pdim); 1291 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1292 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1293 fft->data = (void*)fftw; 1294 1295 fft->n = n; 1296 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1297 fftw->partial_dim = partial_dim; 1298 1299 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1300 if (size == 1) { 1301 #if defined(PETSC_USE_64BIT_INDICES) 1302 fftw->iodims = (fftw_iodim64 *) malloc(sizeof(fftw_iodim64) * ndim); 1303 #else 1304 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1305 #endif 1306 } 1307 1308 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1309 1310 fftw->p_forward = NULL; 1311 fftw->p_backward = NULL; 1312 fftw->p_flag = FFTW_ESTIMATE; 1313 1314 if (size == 1) { 1315 A->ops->mult = MatMult_SeqFFTW; 1316 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1317 } else { 1318 A->ops->mult = MatMult_MPIFFTW; 1319 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1320 } 1321 fft->matdestroy = MatDestroy_FFTW; 1322 A->assembled = PETSC_TRUE; 1323 A->preallocated = PETSC_TRUE; 1324 1325 ierr = PetscObjectComposeFunction((PetscObject)A,"MatCreateVecsFFTW_C",MatCreateVecsFFTW_FFTW);CHKERRQ(ierr); 1326 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1327 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1328 1329 /* get runtime options */ 1330 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1331 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1332 if (flg) { 1333 fftw->p_flag = iplans[p_flag]; 1334 } 1335 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1336 PetscFunctionReturn(0); 1337 } 1338 1339 1340 1341 1342