1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 fftw_iodim *iodims; 15 PetscInt partial_dim; 16 fftw_plan p_forward,p_backward; 17 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 18 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 19 executed for the arrays with which the plan was created */ 20 } Mat_FFTW; 21 22 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 26 extern PetscErrorCode MatDestroy_FFTW(Mat); 27 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 28 29 /* MatMult_SeqFFTW performs forward DFT in parallel 30 Input parameter: 31 A - the matrix 32 x - the vector on which FDFT will be performed 33 34 Output parameter: 35 y - vector that stores result of FDFT 36 */ 37 #undef __FUNCT__ 38 #define __FUNCT__ "MatMult_SeqFFTW" 39 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 40 { 41 PetscErrorCode ierr; 42 Mat_FFT *fft = (Mat_FFT*)A->data; 43 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 44 PetscScalar *x_array,*y_array; 45 fftw_iodim *iodims; 46 PetscInt ndim = fft->ndim,*dim = fft->dim,i; 47 48 PetscFunctionBegin; 49 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 50 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 51 if (!fftw->p_forward) { /* create a plan, then excute it */ 52 switch (ndim) { 53 case 1: 54 #if defined(PETSC_USE_COMPLEX) 55 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 56 #else 57 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 58 #endif 59 break; 60 case 2: 61 #if defined(PETSC_USE_COMPLEX) 62 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 63 #else 64 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 65 #endif 66 break; 67 case 3: 68 #if defined(PETSC_USE_COMPLEX) 69 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 70 #else 71 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 72 #endif 73 break; 74 default: 75 #if defined(PETSC_USE_COMPLEX) 76 iodims = (fftw_iodim*)fftw->iodims; 77 if (ndim) { 78 iodims[ndim-1].n = dim[ndim-1]; 79 iodims[ndim-1].is = iodims[ndim-1].os = 1; 80 for (i=ndim-2; i>=0; --i) { 81 iodims[i].n = dim[i]; 82 iodims[i].is = iodims[i].os = iodims[i+1].is * iodims[i+1].n; 83 } 84 } 85 fftw->p_forward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 86 #else 87 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double*)x_array,(fftw_complex*)y_array,fftw->p_flag); 88 #endif 89 break; 90 } 91 fftw->finarray = x_array; 92 fftw->foutarray = y_array; 93 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 94 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 95 fftw_execute(fftw->p_forward); 96 } else { /* use existing plan */ 97 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 98 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 99 } else { 100 fftw_execute(fftw->p_forward); 101 } 102 } 103 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 104 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 105 PetscFunctionReturn(0); 106 } 107 108 /* MatMultTranspose_SeqFFTW performs serial backward DFT 109 Input parameter: 110 A - the matrix 111 x - the vector on which BDFT will be performed 112 113 Output parameter: 114 y - vector that stores result of BDFT 115 */ 116 117 #undef __FUNCT__ 118 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 119 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 120 { 121 PetscErrorCode ierr; 122 Mat_FFT *fft = (Mat_FFT*)A->data; 123 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 124 PetscScalar *x_array,*y_array; 125 PetscInt ndim=fft->ndim,*dim=fft->dim; 126 fftw_iodim *iodims=(fftw_iodim*)fftw->iodims; 127 128 PetscFunctionBegin; 129 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 130 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 131 if (!fftw->p_backward) { /* create a plan, then excute it */ 132 switch (ndim) { 133 case 1: 134 #if defined(PETSC_USE_COMPLEX) 135 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 136 #else 137 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 138 #endif 139 break; 140 case 2: 141 #if defined(PETSC_USE_COMPLEX) 142 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 143 #else 144 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 145 #endif 146 break; 147 case 3: 148 #if defined(PETSC_USE_COMPLEX) 149 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 150 #else 151 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 152 #endif 153 break; 154 default: 155 #if defined(PETSC_USE_COMPLEX) 156 fftw->p_backward = fftw_plan_guru_dft((int)ndim,iodims,0,NULL,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 157 #else 158 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double*)y_array,fftw->p_flag); 159 #endif 160 break; 161 } 162 fftw->binarray = x_array; 163 fftw->boutarray = y_array; 164 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 165 } else { /* use existing plan */ 166 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 167 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 168 } else { 169 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 170 } 171 } 172 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 173 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 174 PetscFunctionReturn(0); 175 } 176 177 /* MatMult_MPIFFTW performs forward DFT in parallel 178 Input parameter: 179 A - the matrix 180 x - the vector on which FDFT will be performed 181 182 Output parameter: 183 y - vector that stores result of FDFT 184 */ 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatMult_MPIFFTW" 187 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 188 { 189 PetscErrorCode ierr; 190 Mat_FFT *fft = (Mat_FFT*)A->data; 191 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 192 PetscScalar *x_array,*y_array; 193 PetscInt ndim=fft->ndim,*dim=fft->dim; 194 MPI_Comm comm; 195 196 PetscFunctionBegin; 197 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 198 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 199 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 200 if (!fftw->p_forward) { /* create a plan, then excute it */ 201 switch (ndim) { 202 case 1: 203 #if defined(PETSC_USE_COMPLEX) 204 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 205 #else 206 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 207 #endif 208 break; 209 case 2: 210 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 211 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 212 #else 213 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 214 #endif 215 break; 216 case 3: 217 #if defined(PETSC_USE_COMPLEX) 218 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 219 #else 220 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 221 #endif 222 break; 223 default: 224 #if defined(PETSC_USE_COMPLEX) 225 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 226 #else 227 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double*)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 228 #endif 229 break; 230 } 231 fftw->finarray = x_array; 232 fftw->foutarray = y_array; 233 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 234 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 235 fftw_execute(fftw->p_forward); 236 } else { /* use existing plan */ 237 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 238 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 239 } else { 240 fftw_execute(fftw->p_forward); 241 } 242 } 243 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 244 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 245 PetscFunctionReturn(0); 246 } 247 248 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 249 Input parameter: 250 A - the matrix 251 x - the vector on which BDFT will be performed 252 253 Output parameter: 254 y - vector that stores result of BDFT 255 */ 256 #undef __FUNCT__ 257 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 258 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 259 { 260 PetscErrorCode ierr; 261 Mat_FFT *fft = (Mat_FFT*)A->data; 262 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 263 PetscScalar *x_array,*y_array; 264 PetscInt ndim=fft->ndim,*dim=fft->dim; 265 MPI_Comm comm; 266 267 PetscFunctionBegin; 268 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 269 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 270 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 271 if (!fftw->p_backward) { /* create a plan, then excute it */ 272 switch (ndim) { 273 case 1: 274 #if defined(PETSC_USE_COMPLEX) 275 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 276 #else 277 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 278 #endif 279 break; 280 case 2: 281 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 282 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 283 #else 284 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 285 #endif 286 break; 287 case 3: 288 #if defined(PETSC_USE_COMPLEX) 289 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 290 #else 291 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 292 #endif 293 break; 294 default: 295 #if defined(PETSC_USE_COMPLEX) 296 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 297 #else 298 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double*)y_array,comm,FFTW_ESTIMATE); 299 #endif 300 break; 301 } 302 fftw->binarray = x_array; 303 fftw->boutarray = y_array; 304 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 305 } else { /* use existing plan */ 306 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 307 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 308 } else { 309 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 310 } 311 } 312 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 313 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 314 PetscFunctionReturn(0); 315 } 316 317 #undef __FUNCT__ 318 #define __FUNCT__ "MatDestroy_FFTW" 319 PetscErrorCode MatDestroy_FFTW(Mat A) 320 { 321 Mat_FFT *fft = (Mat_FFT*)A->data; 322 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 323 PetscErrorCode ierr; 324 325 PetscFunctionBegin; 326 fftw_destroy_plan(fftw->p_forward); 327 fftw_destroy_plan(fftw->p_backward); 328 if (fftw->iodims) { 329 /* ierr = PetscFree(fftw->iodims);CHKERRQ(ierr); */ 330 free(fftw->iodims); 331 } 332 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 333 ierr = PetscFree(fft->data);CHKERRQ(ierr); 334 fftw_mpi_cleanup(); 335 PetscFunctionReturn(0); 336 } 337 338 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 339 #undef __FUNCT__ 340 #define __FUNCT__ "VecDestroy_MPIFFTW" 341 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 342 { 343 PetscErrorCode ierr; 344 PetscScalar *array; 345 346 PetscFunctionBegin; 347 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 348 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 349 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 350 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 351 PetscFunctionReturn(0); 352 } 353 354 #undef __FUNCT__ 355 #define __FUNCT__ "MatGetVecsFFTW" 356 /*@ 357 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 358 parallel layout determined by FFTW 359 360 Collective on Mat 361 362 Input Parameter: 363 . A - the matrix 364 365 Output Parameter: 366 + x - (optional) input vector of forward FFTW 367 - y - (optional) output vector of forward FFTW 368 - z - (optional) output vector of backward FFTW 369 370 Level: advanced 371 372 Note: The parallel layout of output of forward FFTW is always same as the input 373 of the backward FFTW. But parallel layout of the input vector of forward 374 FFTW might not be same as the output of backward FFTW. 375 Also note that we need to provide enough space while doing parallel real transform. 376 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 377 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 378 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 379 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 380 zeros if it is odd only one zero is needed. 381 Lastly one needs some scratch space at the end of data set in each process. alloc_local 382 figures out how much space is needed, i.e. it figures out the data+scratch space for 383 each processor and returns that. 384 385 .seealso: MatCreateFFTW() 386 @*/ 387 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 388 { 389 PetscErrorCode ierr; 390 391 PetscFunctionBegin; 392 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 393 PetscFunctionReturn(0); 394 } 395 396 #undef __FUNCT__ 397 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 398 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 399 { 400 PetscErrorCode ierr; 401 PetscMPIInt size,rank; 402 MPI_Comm comm; 403 Mat_FFT *fft = (Mat_FFT*)A->data; 404 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 405 PetscInt N = fft->N; 406 PetscInt ndim = fft->ndim,*dim=fft->dim,n=fft->n; 407 408 PetscFunctionBegin; 409 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 410 PetscValidType(A,1); 411 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 412 413 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 414 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 415 if (size == 1) { /* sequential case */ 416 #if defined(PETSC_USE_COMPLEX) 417 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 418 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 419 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 420 #else 421 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 422 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 423 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 424 #endif 425 } else { /* parallel cases */ 426 ptrdiff_t alloc_local,local_n0,local_0_start; 427 ptrdiff_t local_n1; 428 fftw_complex *data_fout; 429 ptrdiff_t local_1_start; 430 #if defined(PETSC_USE_COMPLEX) 431 fftw_complex *data_fin,*data_bout; 432 #else 433 double *data_finr,*data_boutr; 434 PetscInt n1,N1; 435 ptrdiff_t temp; 436 #endif 437 438 switch (ndim) { 439 case 1: 440 #if !defined(PETSC_USE_COMPLEX) 441 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 442 #else 443 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 444 if (fin) { 445 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 446 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 447 448 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 449 } 450 if (fout) { 451 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 452 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 453 454 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 455 } 456 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 457 if (bout) { 458 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 459 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 460 461 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 break; 464 #endif 465 case 2: 466 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 467 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 468 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 469 if (fin) { 470 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 471 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 472 473 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 474 } 475 if (bout) { 476 data_boutr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 477 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 478 479 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 if (fout) { 482 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 484 485 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 486 } 487 #else 488 /* Get local size */ 489 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 490 if (fin) { 491 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 492 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 493 494 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 495 } 496 if (fout) { 497 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 498 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 499 500 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 501 } 502 if (bout) { 503 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 504 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 505 506 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 507 } 508 #endif 509 break; 510 case 3: 511 #if !defined(PETSC_USE_COMPLEX) 512 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 513 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 514 if (fin) { 515 data_finr = (double*)fftw_malloc(sizeof(double)*alloc_local*2); 516 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 517 518 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 if (bout) { 521 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 522 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 523 524 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 525 } 526 527 if (fout) { 528 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 529 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 530 531 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 532 } 533 #else 534 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 535 if (fin) { 536 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 537 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 538 539 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 540 } 541 if (fout) { 542 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 544 545 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 546 } 547 if (bout) { 548 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 549 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 550 551 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 #endif 554 break; 555 default: 556 #if !defined(PETSC_USE_COMPLEX) 557 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 558 559 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 560 561 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 562 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 563 564 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 565 566 if (fin) { 567 data_finr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 568 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 569 570 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 571 } 572 if (bout) { 573 data_boutr=(double*)fftw_malloc(sizeof(double)*alloc_local*2); 574 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 575 576 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 579 if (fout) { 580 data_fout=(fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 581 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 582 583 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 584 } 585 #else 586 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 587 if (fin) { 588 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 589 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 590 591 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 592 } 593 if (fout) { 594 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 595 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 596 597 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 598 } 599 if (bout) { 600 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 601 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 602 603 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 604 } 605 #endif 606 break; 607 } 608 } 609 PetscFunctionReturn(0); 610 } 611 612 #undef __FUNCT__ 613 #define __FUNCT__ "VecScatterPetscToFFTW" 614 /*@ 615 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 616 617 Collective on Mat 618 619 Input Parameters: 620 + A - FFTW matrix 621 - x - the PETSc vector 622 623 Output Parameters: 624 . y - the FFTW vector 625 626 Options Database Keys: 627 . -mat_fftw_plannerflags - set FFTW planner flags 628 629 Level: intermediate 630 631 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 632 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 633 zeros. This routine does that job by scattering operation. 634 635 .seealso: VecScatterFFTWToPetsc() 636 @*/ 637 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 638 { 639 PetscErrorCode ierr; 640 641 PetscFunctionBegin; 642 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 643 PetscFunctionReturn(0); 644 } 645 646 #undef __FUNCT__ 647 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 648 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 649 { 650 PetscErrorCode ierr; 651 MPI_Comm comm; 652 Mat_FFT *fft = (Mat_FFT*)A->data; 653 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 654 PetscInt N =fft->N; 655 PetscInt ndim =fft->ndim,*dim=fft->dim; 656 PetscInt low; 657 PetscMPIInt rank,size; 658 PetscInt vsize,vsize1; 659 ptrdiff_t local_n0,local_0_start; 660 ptrdiff_t local_n1,local_1_start; 661 VecScatter vecscat; 662 IS list1,list2; 663 #if !defined(PETSC_USE_COMPLEX) 664 PetscInt i,j,k,partial_dim; 665 PetscInt *indx1, *indx2, tempindx, tempindx1; 666 PetscInt N1, n1,NM; 667 ptrdiff_t temp; 668 #endif 669 670 PetscFunctionBegin; 671 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 672 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 673 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 674 ierr = VecGetOwnershipRange(y,&low,NULL); 675 676 if (size==1) { 677 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 678 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 679 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 680 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 681 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 682 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 683 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 684 ierr = ISDestroy(&list1);CHKERRQ(ierr); 685 } else { 686 switch (ndim) { 687 case 1: 688 #if defined(PETSC_USE_COMPLEX) 689 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 690 691 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 692 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 693 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 694 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 695 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 697 ierr = ISDestroy(&list1);CHKERRQ(ierr); 698 ierr = ISDestroy(&list2);CHKERRQ(ierr); 699 #else 700 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 701 #endif 702 break; 703 case 2: 704 #if defined(PETSC_USE_COMPLEX) 705 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 706 707 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 708 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 709 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 710 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 711 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 713 ierr = ISDestroy(&list1);CHKERRQ(ierr); 714 ierr = ISDestroy(&list2);CHKERRQ(ierr); 715 #else 716 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 717 718 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 719 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 720 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 721 722 if (dim[1]%2==0) { 723 NM = dim[1]+2; 724 } else { 725 NM = dim[1]+1; 726 } 727 for (i=0; i<local_n0; i++) { 728 for (j=0; j<dim[1]; j++) { 729 tempindx = i*dim[1] + j; 730 tempindx1 = i*NM + j; 731 732 indx1[tempindx]=local_0_start*dim[1]+tempindx; 733 indx2[tempindx]=low+tempindx1; 734 } 735 } 736 737 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 738 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 739 740 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 741 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 744 ierr = ISDestroy(&list1);CHKERRQ(ierr); 745 ierr = ISDestroy(&list2);CHKERRQ(ierr); 746 ierr = PetscFree(indx1);CHKERRQ(ierr); 747 ierr = PetscFree(indx2);CHKERRQ(ierr); 748 #endif 749 break; 750 751 case 3: 752 #if defined(PETSC_USE_COMPLEX) 753 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 754 755 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 756 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 757 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 758 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 759 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 761 ierr = ISDestroy(&list1);CHKERRQ(ierr); 762 ierr = ISDestroy(&list2);CHKERRQ(ierr); 763 #else 764 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 765 766 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 767 768 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 769 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 770 771 if (dim[2]%2==0) NM = dim[2]+2; 772 else NM = dim[2]+1; 773 774 for (i=0; i<local_n0; i++) { 775 for (j=0; j<dim[1]; j++) { 776 for (k=0; k<dim[2]; k++) { 777 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 778 tempindx1 = i*dim[1]*NM + j*NM + k; 779 780 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 781 indx2[tempindx]=low+tempindx1; 782 } 783 } 784 } 785 786 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 787 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 788 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 789 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 790 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 792 ierr = ISDestroy(&list1);CHKERRQ(ierr); 793 ierr = ISDestroy(&list2);CHKERRQ(ierr); 794 ierr = PetscFree(indx1);CHKERRQ(ierr); 795 ierr = PetscFree(indx2);CHKERRQ(ierr); 796 #endif 797 break; 798 799 default: 800 #if defined(PETSC_USE_COMPLEX) 801 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 802 803 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 804 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 805 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 806 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 807 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 808 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 809 ierr = ISDestroy(&list1);CHKERRQ(ierr); 810 ierr = ISDestroy(&list2);CHKERRQ(ierr); 811 #else 812 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 813 814 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 815 816 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 817 818 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 819 820 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 821 822 partial_dim = fftw->partial_dim; 823 824 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 825 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 826 827 if (dim[ndim-1]%2==0) NM = 2; 828 else NM = 1; 829 830 j = low; 831 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 832 indx1[i] = local_0_start*partial_dim + i; 833 indx2[i] = j; 834 if (k%dim[ndim-1]==0) j+=NM; 835 j++; 836 } 837 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 838 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 839 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 840 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 841 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 842 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 843 ierr = ISDestroy(&list1);CHKERRQ(ierr); 844 ierr = ISDestroy(&list2);CHKERRQ(ierr); 845 ierr = PetscFree(indx1);CHKERRQ(ierr); 846 ierr = PetscFree(indx2);CHKERRQ(ierr); 847 #endif 848 break; 849 } 850 } 851 PetscFunctionReturn(0); 852 } 853 854 #undef __FUNCT__ 855 #define __FUNCT__ "VecScatterFFTWToPetsc" 856 /*@ 857 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 858 859 Collective on Mat 860 861 Input Parameters: 862 + A - FFTW matrix 863 - x - FFTW vector 864 865 Output Parameters: 866 . y - PETSc vector 867 868 Level: intermediate 869 870 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 871 VecScatterFFTWToPetsc removes those extra zeros. 872 873 .seealso: VecScatterPetscToFFTW() 874 @*/ 875 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 876 { 877 PetscErrorCode ierr; 878 879 PetscFunctionBegin; 880 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 #undef __FUNCT__ 885 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 886 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 887 { 888 PetscErrorCode ierr; 889 MPI_Comm comm; 890 Mat_FFT *fft = (Mat_FFT*)A->data; 891 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 892 PetscInt N = fft->N; 893 PetscInt ndim = fft->ndim,*dim=fft->dim; 894 PetscInt low; 895 PetscMPIInt rank,size; 896 ptrdiff_t local_n0,local_0_start; 897 ptrdiff_t local_n1,local_1_start; 898 VecScatter vecscat; 899 IS list1,list2; 900 #if !defined(PETSC_USE_COMPLEX) 901 PetscInt i,j,k,partial_dim; 902 PetscInt *indx1, *indx2, tempindx, tempindx1; 903 PetscInt N1, n1,NM; 904 ptrdiff_t temp; 905 #endif 906 907 PetscFunctionBegin; 908 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 909 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 910 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 911 ierr = VecGetOwnershipRange(x,&low,NULL);CHKERRQ(ierr); 912 913 if (size==1) { 914 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 915 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 916 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 917 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 918 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 919 ierr = ISDestroy(&list1);CHKERRQ(ierr); 920 921 } else { 922 switch (ndim) { 923 case 1: 924 #if defined(PETSC_USE_COMPLEX) 925 fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 926 927 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 928 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 929 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 930 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 931 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 932 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 933 ierr = ISDestroy(&list1);CHKERRQ(ierr); 934 ierr = ISDestroy(&list2);CHKERRQ(ierr); 935 #else 936 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 937 #endif 938 break; 939 case 2: 940 #if defined(PETSC_USE_COMPLEX) 941 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 942 943 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 944 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 945 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 946 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 947 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 948 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 949 ierr = ISDestroy(&list1);CHKERRQ(ierr); 950 ierr = ISDestroy(&list2);CHKERRQ(ierr); 951 #else 952 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 953 954 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 955 956 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 957 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 958 959 if (dim[1]%2==0) NM = dim[1]+2; 960 else NM = dim[1]+1; 961 962 for (i=0; i<local_n0; i++) { 963 for (j=0; j<dim[1]; j++) { 964 tempindx = i*dim[1] + j; 965 tempindx1 = i*NM + j; 966 967 indx1[tempindx]=local_0_start*dim[1]+tempindx; 968 indx2[tempindx]=low+tempindx1; 969 } 970 } 971 972 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 973 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 974 975 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 976 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 977 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 978 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 979 ierr = ISDestroy(&list1);CHKERRQ(ierr); 980 ierr = ISDestroy(&list2);CHKERRQ(ierr); 981 ierr = PetscFree(indx1);CHKERRQ(ierr); 982 ierr = PetscFree(indx2);CHKERRQ(ierr); 983 #endif 984 break; 985 case 3: 986 #if defined(PETSC_USE_COMPLEX) 987 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 988 989 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 990 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 991 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 992 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 993 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 994 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 995 ierr = ISDestroy(&list1);CHKERRQ(ierr); 996 ierr = ISDestroy(&list2);CHKERRQ(ierr); 997 #else 998 999 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1000 1001 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1002 1003 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1004 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1005 1006 if (dim[2]%2==0) NM = dim[2]+2; 1007 else NM = dim[2]+1; 1008 1009 for (i=0; i<local_n0; i++) { 1010 for (j=0; j<dim[1]; j++) { 1011 for (k=0; k<dim[2]; k++) { 1012 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1013 tempindx1 = i*dim[1]*NM + j*NM + k; 1014 1015 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1016 indx2[tempindx]=low+tempindx1; 1017 } 1018 } 1019 } 1020 1021 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1022 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1023 1024 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1025 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1026 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1027 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1028 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1029 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1030 ierr = PetscFree(indx1);CHKERRQ(ierr); 1031 ierr = PetscFree(indx2);CHKERRQ(ierr); 1032 #endif 1033 break; 1034 default: 1035 #if defined(PETSC_USE_COMPLEX) 1036 fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1037 1038 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1039 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 1040 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1041 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1044 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1045 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1046 #else 1047 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1048 1049 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1050 1051 fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1052 1053 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1054 1055 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1056 1057 partial_dim = fftw->partial_dim; 1058 1059 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1060 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1061 1062 if (dim[ndim-1]%2==0) NM = 2; 1063 else NM = 1; 1064 1065 j = low; 1066 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim; i++,k++) { 1067 indx1[i] = local_0_start*partial_dim + i; 1068 indx2[i] = j; 1069 if (k%dim[ndim-1]==0) j+=NM; 1070 j++; 1071 } 1072 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1073 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1074 1075 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1076 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1077 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1078 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1079 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1080 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1081 ierr = PetscFree(indx1);CHKERRQ(ierr); 1082 ierr = PetscFree(indx2);CHKERRQ(ierr); 1083 #endif 1084 break; 1085 } 1086 } 1087 PetscFunctionReturn(0); 1088 } 1089 1090 #undef __FUNCT__ 1091 #define __FUNCT__ "MatCreate_FFTW" 1092 /* 1093 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1094 1095 Options Database Keys: 1096 + -mat_fftw_plannerflags - set FFTW planner flags 1097 1098 Level: intermediate 1099 1100 */ 1101 PETSC_EXTERN PetscErrorCode MatCreate_FFTW(Mat A) 1102 { 1103 PetscErrorCode ierr; 1104 MPI_Comm comm; 1105 Mat_FFT *fft=(Mat_FFT*)A->data; 1106 Mat_FFTW *fftw; 1107 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim=fft->dim; 1108 const char *plans[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1109 unsigned iplans[]={FFTW_ESTIMATE,FFTW_MEASURE,FFTW_PATIENT,FFTW_EXHAUSTIVE}; 1110 PetscBool flg; 1111 PetscInt p_flag,partial_dim=1,ctr; 1112 PetscMPIInt size,rank; 1113 ptrdiff_t *pdim; 1114 ptrdiff_t local_n1,local_1_start; 1115 #if !defined(PETSC_USE_COMPLEX) 1116 ptrdiff_t temp; 1117 PetscInt N1,tot_dim; 1118 #else 1119 PetscInt n1; 1120 #endif 1121 1122 PetscFunctionBegin; 1123 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1124 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1125 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1126 1127 fftw_mpi_init(); 1128 pdim = (ptrdiff_t*)calloc(ndim,sizeof(ptrdiff_t)); 1129 pdim[0] = dim[0]; 1130 #if !defined(PETSC_USE_COMPLEX) 1131 tot_dim = 2*dim[0]; 1132 #endif 1133 for (ctr=1; ctr<ndim; ctr++) { 1134 partial_dim *= dim[ctr]; 1135 pdim[ctr] = dim[ctr]; 1136 #if !defined(PETSC_USE_COMPLEX) 1137 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1138 else tot_dim *= dim[ctr]; 1139 #endif 1140 } 1141 1142 if (size == 1) { 1143 #if defined(PETSC_USE_COMPLEX) 1144 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1145 n = N; 1146 #else 1147 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1148 n = tot_dim; 1149 #endif 1150 1151 } else { 1152 ptrdiff_t local_n0,local_0_start; 1153 switch (ndim) { 1154 case 1: 1155 #if !defined(PETSC_USE_COMPLEX) 1156 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1157 #else 1158 fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1159 1160 n = (PetscInt)local_n0; 1161 n1 = (PetscInt)local_n1; 1162 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1163 #endif 1164 break; 1165 case 2: 1166 #if defined(PETSC_USE_COMPLEX) 1167 fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1168 /* 1169 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1170 PetscSynchronizedFlush(comm,PETSC_STDOUT); 1171 */ 1172 n = (PetscInt)local_n0*dim[1]; 1173 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1174 #else 1175 fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1176 1177 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1178 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1179 #endif 1180 break; 1181 case 3: 1182 #if defined(PETSC_USE_COMPLEX) 1183 fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1184 1185 n = (PetscInt)local_n0*dim[1]*dim[2]; 1186 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1187 #else 1188 fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1189 1190 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1191 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1192 #endif 1193 break; 1194 default: 1195 #if defined(PETSC_USE_COMPLEX) 1196 fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1197 1198 n = (PetscInt)local_n0*partial_dim; 1199 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1200 #else 1201 temp = pdim[ndim-1]; 1202 1203 pdim[ndim-1] = temp/2 + 1; 1204 1205 fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1206 1207 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1208 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1209 1210 pdim[ndim-1] = temp; 1211 1212 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1213 #endif 1214 break; 1215 } 1216 } 1217 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1218 ierr = PetscNewLog(A,&fftw);CHKERRQ(ierr); 1219 fft->data = (void*)fftw; 1220 1221 fft->n = n; 1222 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1223 fftw->partial_dim = partial_dim; 1224 1225 ierr = PetscMalloc1(ndim, &(fftw->dim_fftw));CHKERRQ(ierr); 1226 if (size == 1) { 1227 /* ierr = PetscMalloc(ndim, &(fftw->iodims));CHKERRQ(ierr); error! */ 1228 /* ierr = PetscMalloc(ndim*sizeof(fftw_iodim),&(fftw->iodims));CHKERRQ(ierr); -not sure if this is ok */ 1229 fftw->iodims = (fftw_iodim *) malloc(sizeof(fftw_iodim) * ndim); 1230 } 1231 1232 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1233 1234 fftw->p_forward = 0; 1235 fftw->p_backward = 0; 1236 fftw->p_flag = FFTW_ESTIMATE; 1237 1238 if (size == 1) { 1239 A->ops->mult = MatMult_SeqFFTW; 1240 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1241 } else { 1242 A->ops->mult = MatMult_MPIFFTW; 1243 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1244 } 1245 fft->matdestroy = MatDestroy_FFTW; 1246 A->assembled = PETSC_TRUE; 1247 A->preallocated = PETSC_TRUE; 1248 1249 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetVecsFFTW_C",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1250 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterPetscToFFTW_C",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1251 ierr = PetscObjectComposeFunction((PetscObject)A,"VecScatterFFTWToPetsc_C",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1252 1253 /* get runtime options */ 1254 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1255 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",plans,4,plans[0],&p_flag,&flg);CHKERRQ(ierr); 1256 if (flg) { 1257 fftw->p_flag = iplans[p_flag]; 1258 } 1259 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1260 PetscFunctionReturn(0); 1261 } 1262 1263 1264 1265 1266