1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward){ /* create a plan, then excute it */ 50 switch (ndim){ 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 } 80 fftw->finarray = x_array; 81 fftw->foutarray = y_array; 82 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84 fftw_execute(fftw->p_forward); 85 } else { /* use existing plan */ 86 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 87 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88 } else { 89 fftw_execute(fftw->p_forward); 90 } 91 } 92 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatMultTranspose_SeqFFTW performs serial backward DFT 98 Input parameter: 99 A - the matrix 100 x - the vector on which BDFT will be performed 101 102 Output parameter: 103 y - vector that stores result of BDFT 104 */ 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109 { 110 PetscErrorCode ierr; 111 Mat_FFT *fft = (Mat_FFT*)A->data; 112 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113 PetscScalar *x_array,*y_array; 114 PetscInt ndim=fft->ndim,*dim=fft->dim; 115 116 PetscFunctionBegin; 117 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119 if (!fftw->p_backward){ /* create a plan, then excute it */ 120 switch (ndim){ 121 case 1: 122 #if defined(PETSC_USE_COMPLEX) 123 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 124 #else 125 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 126 #endif 127 break; 128 case 2: 129 #if defined(PETSC_USE_COMPLEX) 130 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 131 #else 132 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 133 #endif 134 break; 135 case 3: 136 #if defined(PETSC_USE_COMPLEX) 137 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 138 #else 139 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 140 #endif 141 break; 142 default: 143 #if defined(PETSC_USE_COMPLEX) 144 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 145 #else 146 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 147 #endif 148 break; 149 } 150 fftw->binarray = x_array; 151 fftw->boutarray = y_array; 152 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153 } else { /* use existing plan */ 154 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 155 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156 } else { 157 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158 } 159 } 160 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /* MatMult_MPIFFTW performs forward DFT in parallel 166 Input parameter: 167 A - the matrix 168 x - the vector on which FDFT will be performed 169 170 Output parameter: 171 y - vector that stores result of FDFT 172 */ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_MPIFFTW" 175 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176 { 177 PetscErrorCode ierr; 178 Mat_FFT *fft = (Mat_FFT*)A->data; 179 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180 PetscScalar *x_array,*y_array; 181 PetscInt ndim=fft->ndim,*dim=fft->dim; 182 MPI_Comm comm=((PetscObject)A)->comm; 183 184 PetscFunctionBegin; 185 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 186 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 187 if (!fftw->p_forward){ /* create a plan, then excute it */ 188 switch (ndim){ 189 case 1: 190 #if defined(PETSC_USE_COMPLEX) 191 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 192 #else 193 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 194 #endif 195 break; 196 case 2: 197 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 198 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 201 #endif 202 break; 203 case 3: 204 #if defined(PETSC_USE_COMPLEX) 205 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 default: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 } 218 fftw->finarray = x_array; 219 fftw->foutarray = y_array; 220 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 221 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 222 fftw_execute(fftw->p_forward); 223 } else { /* use existing plan */ 224 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 225 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 226 } else { 227 fftw_execute(fftw->p_forward); 228 } 229 } 230 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 231 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 236 Input parameter: 237 A - the matrix 238 x - the vector on which BDFT will be performed 239 240 Output parameter: 241 y - vector that stores result of BDFT 242 */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 245 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 246 { 247 PetscErrorCode ierr; 248 Mat_FFT *fft = (Mat_FFT*)A->data; 249 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 250 PetscScalar *x_array,*y_array; 251 PetscInt ndim=fft->ndim,*dim=fft->dim; 252 MPI_Comm comm=((PetscObject)A)->comm; 253 254 PetscFunctionBegin; 255 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 256 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 257 if (!fftw->p_backward){ /* create a plan, then excute it */ 258 switch (ndim){ 259 case 1: 260 #if defined(PETSC_USE_COMPLEX) 261 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 262 #else 263 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 264 #endif 265 break; 266 case 2: 267 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 268 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 269 #else 270 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 271 #endif 272 break; 273 case 3: 274 #if defined(PETSC_USE_COMPLEX) 275 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 276 #else 277 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 278 #endif 279 break; 280 default: 281 #if defined(PETSC_USE_COMPLEX) 282 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 283 #else 284 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 285 #endif 286 break; 287 } 288 fftw->binarray = x_array; 289 fftw->boutarray = y_array; 290 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 291 } else { /* use existing plan */ 292 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 293 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 294 } else { 295 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296 } 297 } 298 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 299 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatDestroy_FFTW" 305 PetscErrorCode MatDestroy_FFTW(Mat A) 306 { 307 Mat_FFT *fft = (Mat_FFT*)A->data; 308 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 fftw_destroy_plan(fftw->p_forward); 313 fftw_destroy_plan(fftw->p_backward); 314 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 315 ierr = PetscFree(fft->data);CHKERRQ(ierr); 316 PetscFunctionReturn(0); 317 } 318 319 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 320 #undef __FUNCT__ 321 #define __FUNCT__ "VecDestroy_MPIFFTW" 322 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 323 { 324 PetscErrorCode ierr; 325 PetscScalar *array; 326 327 PetscFunctionBegin; 328 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 329 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 330 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 331 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 332 PetscFunctionReturn(0); 333 } 334 335 #undef __FUNCT__ 336 #define __FUNCT__ "MatGetVecsFFTW" 337 /*@ 338 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 339 parallel layout determined by FFTW 340 341 Collective on Mat 342 343 Input Parameter: 344 . A - the matrix 345 346 Output Parameter: 347 + fin - (optional) input vector of forward FFTW 348 - fout - (optional) output vector of forward FFTW 349 - bout - (optional) output vector of backward FFTW 350 351 Level: advanced 352 353 Note: The parallel layout of output of forward FFTW is always same as the input 354 of the backward FFTW. But parallel layout of the input vector of forward 355 FFTW might not be same as the output of backward FFTW. 356 Also note that we need to provide enough space while doing parallel real transform. 357 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 358 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 359 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 360 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 361 zeros if it is odd only one zero is needed. 362 Lastly one needs some scratch space at the end of data set in each process. alloc_local 363 figures out how much space is needed, i.e. it figures out the data+scratch space for 364 each processor and returns that. 365 366 .seealso: MatCreateFFTW() 367 @*/ 368 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 369 { 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 374 PetscFunctionReturn(0); 375 } 376 377 EXTERN_C_BEGIN 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 380 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 381 { 382 PetscErrorCode ierr; 383 PetscMPIInt size,rank; 384 MPI_Comm comm=((PetscObject)A)->comm; 385 Mat_FFT *fft = (Mat_FFT*)A->data; 386 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 387 PetscInt N=fft->N; 388 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 389 390 PetscFunctionBegin; 391 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 392 PetscValidType(A,1); 393 394 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 395 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 396 if (size == 1){ /* sequential case */ 397 #if defined(PETSC_USE_COMPLEX) 398 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 399 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 400 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 401 #else 402 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 403 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 404 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 405 #endif 406 } else { /* parallel cases */ 407 ptrdiff_t alloc_local,local_n0,local_0_start; 408 ptrdiff_t local_n1; 409 fftw_complex *data_fout; 410 ptrdiff_t local_1_start; 411 #if defined(PETSC_USE_COMPLEX) 412 fftw_complex *data_fin,*data_bout; 413 #else 414 double *data_finr,*data_boutr; 415 PetscInt n1,N1,vsize; 416 ptrdiff_t temp; 417 #endif 418 419 switch (ndim){ 420 case 1: 421 #if !defined(PETSC_USE_COMPLEX) 422 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 423 #else 424 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 425 if (fin) { 426 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 427 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 428 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 429 } 430 if (fout) { 431 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 432 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 433 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 434 } 435 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 436 if (bout) { 437 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 438 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 439 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 440 } 441 break; 442 #endif 443 case 2: 444 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 445 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 446 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 447 if (fin) { 448 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 449 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 450 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 451 } 452 if (bout) { 453 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 454 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 455 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 456 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 457 } 458 if (fout) { 459 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 460 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 461 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 #else 464 /* Get local size */ 465 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 466 if (fin) { 467 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 469 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 470 } 471 if (fout) { 472 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 474 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (bout) { 477 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 479 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 #endif 482 break; 483 case 3: 484 #if !defined(PETSC_USE_COMPLEX) 485 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 486 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 487 if (fin) { 488 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 489 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 490 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 if (bout) { 493 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 495 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 498 if (fout) { 499 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 500 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 501 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 502 } 503 #else 504 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 505 if (fin) { 506 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 507 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 508 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 509 } 510 if (fout) { 511 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 513 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (bout) { 516 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 518 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 #endif 521 break; 522 default: 523 #if !defined(PETSC_USE_COMPLEX) 524 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 525 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 526 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 527 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 528 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 529 530 if (fin) { 531 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 532 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 533 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (bout) { 536 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 538 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 541 if (fout) { 542 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 544 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 #else 547 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 548 if (fin) { 549 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 551 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (fout) { 554 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 555 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 556 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (bout) { 559 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 #endif 564 break; 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 EXTERN_C_END 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "VecScatterPetscToFFTW" 573 /*@ 574 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 575 576 Collective on Mat 577 578 Input Parameters: 579 + A - FFTW matrix 580 - x - the PETSc vector 581 582 Output Parameters: 583 . y - the FFTW vector 584 585 Options Database Keys: 586 . -mat_fftw_plannerflags - set FFTW planner flags 587 588 Level: intermediate 589 590 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 591 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 592 zeros. This routine does that job by scattering operation. 593 594 .seealso: VecScatterFFTWToPetsc() 595 @*/ 596 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 597 { 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 EXTERN_C_BEGIN 606 #undef __FUNCT__ 607 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 608 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 609 { 610 PetscErrorCode ierr; 611 MPI_Comm comm=((PetscObject)A)->comm; 612 Mat_FFT *fft = (Mat_FFT*)A->data; 613 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 614 PetscInt N=fft->N; 615 PetscInt ndim=fft->ndim,*dim=fft->dim; 616 PetscInt low; 617 PetscInt rank,size,vsize,vsize1; 618 ptrdiff_t alloc_local,local_n0,local_0_start; 619 ptrdiff_t local_n1,local_1_start; 620 VecScatter vecscat; 621 IS list1,list2; 622 #if !defined(PETSC_USE_COMPLEX) 623 PetscInt i,j,k,partial_dim; 624 PetscInt *indx1, *indx2, tempindx, tempindx1; 625 PetscInt N1, n1 ,NM; 626 ptrdiff_t temp; 627 #endif 628 629 PetscFunctionBegin; 630 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 632 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 633 634 if (size==1){ 635 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 636 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 637 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 638 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 639 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 640 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 641 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 642 ierr = ISDestroy(&list1);CHKERRQ(ierr); 643 } else { 644 switch (ndim){ 645 case 1: 646 #if defined(PETSC_USE_COMPLEX) 647 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 648 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 649 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 650 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 651 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 652 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 654 ierr = ISDestroy(&list1);CHKERRQ(ierr); 655 ierr = ISDestroy(&list2);CHKERRQ(ierr); 656 #else 657 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 658 #endif 659 break; 660 case 2: 661 #if defined(PETSC_USE_COMPLEX) 662 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 663 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 664 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 665 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 666 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 669 ierr = ISDestroy(&list1);CHKERRQ(ierr); 670 ierr = ISDestroy(&list2);CHKERRQ(ierr); 671 #else 672 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 673 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 674 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 675 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 676 677 if (dim[1]%2==0){ 678 NM = dim[1]+2; 679 } else { 680 NM = dim[1]+1; 681 } 682 for (i=0;i<local_n0;i++){ 683 for (j=0;j<dim[1];j++){ 684 tempindx = i*dim[1] + j; 685 tempindx1 = i*NM + j; 686 indx1[tempindx]=local_0_start*dim[1]+tempindx; 687 indx2[tempindx]=low+tempindx1; 688 } 689 } 690 691 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 692 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 693 694 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 695 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 698 ierr = ISDestroy(&list1);CHKERRQ(ierr); 699 ierr = ISDestroy(&list2);CHKERRQ(ierr); 700 ierr = PetscFree(indx1);CHKERRQ(ierr); 701 ierr = PetscFree(indx2);CHKERRQ(ierr); 702 #endif 703 break; 704 705 case 3: 706 #if defined(PETSC_USE_COMPLEX) 707 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 708 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 709 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 710 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 711 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 713 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 714 ierr = ISDestroy(&list1);CHKERRQ(ierr); 715 ierr = ISDestroy(&list2);CHKERRQ(ierr); 716 #else 717 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 718 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 719 720 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 721 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 722 723 if (dim[2]%2==0) 724 NM = dim[2]+2; 725 else 726 NM = dim[2]+1; 727 728 for (i=0;i<local_n0;i++){ 729 for (j=0;j<dim[1];j++){ 730 for (k=0;k<dim[2];k++){ 731 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 732 tempindx1 = i*dim[1]*NM + j*NM + k; 733 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 734 indx2[tempindx]=low+tempindx1; 735 } 736 } 737 } 738 739 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 740 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 741 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 742 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 743 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 744 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 745 ierr = ISDestroy(&list1);CHKERRQ(ierr); 746 ierr = ISDestroy(&list2);CHKERRQ(ierr); 747 ierr = PetscFree(indx1);CHKERRQ(ierr); 748 ierr = PetscFree(indx2);CHKERRQ(ierr); 749 #endif 750 break; 751 752 default: 753 #if defined(PETSC_USE_COMPLEX) 754 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 755 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 756 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 757 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 758 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 759 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 760 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 761 ierr = ISDestroy(&list1);CHKERRQ(ierr); 762 ierr = ISDestroy(&list2);CHKERRQ(ierr); 763 #else 764 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 765 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 766 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 767 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 768 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 769 770 partial_dim = fftw->partial_dim; 771 772 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 773 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 774 775 if (dim[ndim-1]%2==0) 776 NM = 2; 777 else 778 NM = 1; 779 780 j = low; 781 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){ 782 indx1[i] = local_0_start*partial_dim + i; 783 indx2[i] = j; 784 if (k%dim[ndim-1]==0){ j+=NM;} 785 j++; 786 } 787 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 788 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 789 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 790 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 791 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 792 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 793 ierr = ISDestroy(&list1);CHKERRQ(ierr); 794 ierr = ISDestroy(&list2);CHKERRQ(ierr); 795 ierr = PetscFree(indx1);CHKERRQ(ierr); 796 ierr = PetscFree(indx2);CHKERRQ(ierr); 797 #endif 798 break; 799 } 800 } 801 PetscFunctionReturn(0); 802 } 803 EXTERN_C_END 804 805 #undef __FUNCT__ 806 #define __FUNCT__ "VecScatterFFTWToPetsc" 807 /*@ 808 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 809 810 Collective on Mat 811 812 Input Parameters: 813 + A - FFTW matrix 814 - x - FFTW vector 815 816 Output Parameters: 817 . y - PETSc vector 818 819 Level: intermediate 820 821 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 822 VecScatterFFTWToPetsc removes those extra zeros. 823 824 .seealso: VecScatterPetscToFFTW() 825 @*/ 826 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 827 { 828 PetscErrorCode ierr; 829 PetscFunctionBegin; 830 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 831 PetscFunctionReturn(0); 832 } 833 834 EXTERN_C_BEGIN 835 #undef __FUNCT__ 836 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 837 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 838 { 839 PetscErrorCode ierr; 840 MPI_Comm comm=((PetscObject)A)->comm; 841 Mat_FFT *fft = (Mat_FFT*)A->data; 842 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 843 PetscInt N=fft->N; 844 PetscInt ndim=fft->ndim,*dim=fft->dim; 845 PetscInt low; 846 PetscInt rank,size; 847 ptrdiff_t alloc_local,local_n0,local_0_start; 848 ptrdiff_t local_n1,local_1_start; 849 VecScatter vecscat; 850 IS list1,list2; 851 #if !defined(PETSC_USE_COMPLEX) 852 PetscInt i,j,k,partial_dim; 853 PetscInt *indx1, *indx2, tempindx, tempindx1; 854 PetscInt N1, n1 ,NM; 855 ptrdiff_t temp; 856 #endif 857 858 PetscFunctionBegin; 859 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 860 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 861 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 862 863 if (size==1){ 864 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 865 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 866 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 867 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 868 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 869 ierr = ISDestroy(&list1);CHKERRQ(ierr); 870 871 } else{ 872 switch (ndim){ 873 case 1: 874 #if defined(PETSC_USE_COMPLEX) 875 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 876 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 877 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 878 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 879 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 881 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 882 ierr = ISDestroy(&list1);CHKERRQ(ierr); 883 ierr = ISDestroy(&list2);CHKERRQ(ierr); 884 #else 885 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 886 #endif 887 break; 888 case 2: 889 #if defined(PETSC_USE_COMPLEX) 890 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 891 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 892 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 893 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 894 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 897 ierr = ISDestroy(&list1);CHKERRQ(ierr); 898 ierr = ISDestroy(&list2);CHKERRQ(ierr); 899 #else 900 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 901 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 902 903 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 904 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 905 906 if (dim[1]%2==0) 907 NM = dim[1]+2; 908 else 909 NM = dim[1]+1; 910 911 for (i=0;i<local_n0;i++){ 912 for (j=0;j<dim[1];j++){ 913 tempindx = i*dim[1] + j; 914 tempindx1 = i*NM + j; 915 indx1[tempindx]=local_0_start*dim[1]+tempindx; 916 indx2[tempindx]=low+tempindx1; 917 } 918 } 919 920 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 921 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 922 923 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 924 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 925 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 926 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 927 ierr = ISDestroy(&list1);CHKERRQ(ierr); 928 ierr = ISDestroy(&list2);CHKERRQ(ierr); 929 ierr = PetscFree(indx1);CHKERRQ(ierr); 930 ierr = PetscFree(indx2);CHKERRQ(ierr); 931 #endif 932 break; 933 case 3: 934 #if defined(PETSC_USE_COMPLEX) 935 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 936 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 937 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 938 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 939 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 941 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 942 ierr = ISDestroy(&list1);CHKERRQ(ierr); 943 ierr = ISDestroy(&list2);CHKERRQ(ierr); 944 #else 945 946 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 947 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 948 949 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 950 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 951 952 if (dim[2]%2==0) 953 NM = dim[2]+2; 954 else 955 NM = dim[2]+1; 956 957 for (i=0;i<local_n0;i++){ 958 for (j=0;j<dim[1];j++){ 959 for (k=0;k<dim[2];k++){ 960 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 961 tempindx1 = i*dim[1]*NM + j*NM + k; 962 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 963 indx2[tempindx]=low+tempindx1; 964 } 965 } 966 } 967 968 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 969 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 970 971 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 972 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 973 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 974 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 975 ierr = ISDestroy(&list1);CHKERRQ(ierr); 976 ierr = ISDestroy(&list2);CHKERRQ(ierr); 977 ierr = PetscFree(indx1);CHKERRQ(ierr); 978 ierr = PetscFree(indx2);CHKERRQ(ierr); 979 #endif 980 break; 981 default: 982 #if defined(PETSC_USE_COMPLEX) 983 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 984 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 985 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 986 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 987 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 988 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 989 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 990 ierr = ISDestroy(&list1);CHKERRQ(ierr); 991 ierr = ISDestroy(&list2);CHKERRQ(ierr); 992 #else 993 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 994 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 995 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 996 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 997 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 998 999 partial_dim = fftw->partial_dim; 1000 1001 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1002 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1003 1004 if (dim[ndim-1]%2==0) 1005 NM = 2; 1006 else 1007 NM = 1; 1008 1009 j = low; 1010 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++){ 1011 indx1[i] = local_0_start*partial_dim + i; 1012 indx2[i] = j; 1013 if (k%dim[ndim-1]==0)j+=NM; 1014 j++; 1015 } 1016 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1017 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1018 1019 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1020 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1021 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1022 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1023 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1024 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1025 ierr = PetscFree(indx1);CHKERRQ(ierr); 1026 ierr = PetscFree(indx2);CHKERRQ(ierr); 1027 #endif 1028 break; 1029 } 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 EXTERN_C_END 1034 1035 EXTERN_C_BEGIN 1036 #undef __FUNCT__ 1037 #define __FUNCT__ "MatCreate_FFTW" 1038 /* 1039 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1040 1041 Options Database Keys: 1042 + -mat_fftw_plannerflags - set FFTW planner flags 1043 1044 Level: intermediate 1045 1046 */ 1047 PetscErrorCode MatCreate_FFTW(Mat A) 1048 { 1049 PetscErrorCode ierr; 1050 MPI_Comm comm=((PetscObject)A)->comm; 1051 Mat_FFT *fft=(Mat_FFT*)A->data; 1052 Mat_FFTW *fftw; 1053 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1054 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1055 PetscBool flg; 1056 PetscInt p_flag,partial_dim=1,ctr; 1057 PetscMPIInt size,rank; 1058 ptrdiff_t *pdim; 1059 ptrdiff_t local_n1,local_1_start; 1060 #if !defined(PETSC_USE_COMPLEX) 1061 ptrdiff_t temp; 1062 PetscInt N1,tot_dim; 1063 #else 1064 PetscInt n1; 1065 #endif 1066 1067 PetscFunctionBegin; 1068 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1069 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1070 1071 fftw_mpi_init(); 1072 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1073 pdim[0] = dim[0]; 1074 #if !defined(PETSC_USE_COMPLEX) 1075 tot_dim = 2*dim[0]; 1076 #endif 1077 for (ctr=1;ctr<ndim;ctr++){ 1078 partial_dim *= dim[ctr]; 1079 pdim[ctr] = dim[ctr]; 1080 #if !defined(PETSC_USE_COMPLEX) 1081 if (ctr==ndim-1) 1082 tot_dim *= (dim[ctr]/2+1); 1083 else 1084 tot_dim *= dim[ctr]; 1085 #endif 1086 } 1087 1088 if (size == 1) { 1089 #if defined(PETSC_USE_COMPLEX) 1090 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1091 n = N; 1092 #else 1093 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1094 n = tot_dim; 1095 #endif 1096 1097 } else { 1098 ptrdiff_t alloc_local,local_n0,local_0_start; 1099 switch (ndim){ 1100 case 1: 1101 #if !defined(PETSC_USE_COMPLEX) 1102 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1103 #else 1104 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1105 n = (PetscInt)local_n0; 1106 n1 = (PetscInt) local_n1; 1107 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1108 #endif 1109 break; 1110 case 2: 1111 #if defined(PETSC_USE_COMPLEX) 1112 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1113 /* 1114 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1115 PetscSynchronizedFlush(comm); 1116 */ 1117 n = (PetscInt)local_n0*dim[1]; 1118 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1119 #else 1120 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1121 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1122 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1123 #endif 1124 break; 1125 case 3: 1126 #if defined(PETSC_USE_COMPLEX) 1127 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1128 n = (PetscInt)local_n0*dim[1]*dim[2]; 1129 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1130 #else 1131 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1132 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1133 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1134 #endif 1135 break; 1136 default: 1137 #if defined(PETSC_USE_COMPLEX) 1138 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1139 n = (PetscInt)local_n0*partial_dim; 1140 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1141 #else 1142 temp = pdim[ndim-1]; 1143 pdim[ndim-1] = temp/2 + 1; 1144 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1145 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1146 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1147 pdim[ndim-1] = temp; 1148 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1149 #endif 1150 break; 1151 } 1152 } 1153 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1154 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1155 fft->data = (void*)fftw; 1156 1157 fft->n = n; 1158 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1159 fftw->partial_dim = partial_dim; 1160 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1161 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1162 1163 fftw->p_forward = 0; 1164 fftw->p_backward = 0; 1165 fftw->p_flag = FFTW_ESTIMATE; 1166 1167 if (size == 1){ 1168 A->ops->mult = MatMult_SeqFFTW; 1169 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1170 } else { 1171 A->ops->mult = MatMult_MPIFFTW; 1172 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1173 } 1174 fft->matdestroy = MatDestroy_FFTW; 1175 A->assembled = PETSC_TRUE; 1176 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1177 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW); 1178 PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW); 1179 1180 /* get runtime options */ 1181 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1182 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1183 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1184 PetscOptionsEnd(); 1185 PetscFunctionReturn(0); 1186 } 1187 EXTERN_C_END 1188 1189 1190 1191 1192