1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 28 /* MatMult_SeqFFTW performs forward DFT in parallel 29 Input parameter: 30 A - the matrix 31 x - the vector on which FDFT will be performed 32 33 Output parameter: 34 y - vector that stores result of FDFT 35 */ 36 #undef __FUNCT__ 37 #define __FUNCT__ "MatMult_SeqFFTW" 38 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 39 { 40 PetscErrorCode ierr; 41 Mat_FFT *fft = (Mat_FFT*)A->data; 42 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 43 PetscScalar *x_array,*y_array; 44 PetscInt ndim=fft->ndim,*dim=fft->dim; 45 46 PetscFunctionBegin; 47 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 48 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 49 if (!fftw->p_forward) { /* create a plan, then excute it */ 50 switch (ndim) { 51 case 1: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 2: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 case 3: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 default: 73 #if defined(PETSC_USE_COMPLEX) 74 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 75 #else 76 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 77 #endif 78 break; 79 } 80 fftw->finarray = x_array; 81 fftw->foutarray = y_array; 82 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 83 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 84 fftw_execute(fftw->p_forward); 85 } else { /* use existing plan */ 86 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 87 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 88 } else { 89 fftw_execute(fftw->p_forward); 90 } 91 } 92 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 93 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 94 PetscFunctionReturn(0); 95 } 96 97 /* MatMultTranspose_SeqFFTW performs serial backward DFT 98 Input parameter: 99 A - the matrix 100 x - the vector on which BDFT will be performed 101 102 Output parameter: 103 y - vector that stores result of BDFT 104 */ 105 106 #undef __FUNCT__ 107 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 108 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 109 { 110 PetscErrorCode ierr; 111 Mat_FFT *fft = (Mat_FFT*)A->data; 112 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 113 PetscScalar *x_array,*y_array; 114 PetscInt ndim=fft->ndim,*dim=fft->dim; 115 116 PetscFunctionBegin; 117 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 118 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 119 if (!fftw->p_backward) { /* create a plan, then excute it */ 120 switch (ndim) { 121 case 1: 122 #if defined(PETSC_USE_COMPLEX) 123 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 124 #else 125 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 126 #endif 127 break; 128 case 2: 129 #if defined(PETSC_USE_COMPLEX) 130 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 131 #else 132 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 133 #endif 134 break; 135 case 3: 136 #if defined(PETSC_USE_COMPLEX) 137 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 138 #else 139 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 140 #endif 141 break; 142 default: 143 #if defined(PETSC_USE_COMPLEX) 144 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 145 #else 146 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 147 #endif 148 break; 149 } 150 fftw->binarray = x_array; 151 fftw->boutarray = y_array; 152 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 153 } else { /* use existing plan */ 154 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 155 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 156 } else { 157 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 158 } 159 } 160 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 161 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 /* MatMult_MPIFFTW performs forward DFT in parallel 166 Input parameter: 167 A - the matrix 168 x - the vector on which FDFT will be performed 169 170 Output parameter: 171 y - vector that stores result of FDFT 172 */ 173 #undef __FUNCT__ 174 #define __FUNCT__ "MatMult_MPIFFTW" 175 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 176 { 177 PetscErrorCode ierr; 178 Mat_FFT *fft = (Mat_FFT*)A->data; 179 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 180 PetscScalar *x_array,*y_array; 181 PetscInt ndim=fft->ndim,*dim=fft->dim; 182 MPI_Comm comm=((PetscObject)A)->comm; 183 184 PetscFunctionBegin; 185 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 186 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 187 if (!fftw->p_forward) { /* create a plan, then excute it */ 188 switch (ndim) { 189 case 1: 190 #if defined(PETSC_USE_COMPLEX) 191 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 192 #else 193 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 194 #endif 195 break; 196 case 2: 197 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft, for real transforms call fftw_mpi_plan_dft_r2c */ 198 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 199 #else 200 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 201 #endif 202 break; 203 case 3: 204 #if defined(PETSC_USE_COMPLEX) 205 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 206 #else 207 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 208 #endif 209 break; 210 default: 211 #if defined(PETSC_USE_COMPLEX) 212 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 213 #else 214 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 215 #endif 216 break; 217 } 218 fftw->finarray = x_array; 219 fftw->foutarray = y_array; 220 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 221 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 222 fftw_execute(fftw->p_forward); 223 } else { /* use existing plan */ 224 if (fftw->finarray != x_array || fftw->foutarray != y_array) { /* use existing plan on new arrays */ 225 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 226 } else { 227 fftw_execute(fftw->p_forward); 228 } 229 } 230 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 231 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 232 PetscFunctionReturn(0); 233 } 234 235 /* MatMultTranspose_MPIFFTW performs parallel backward DFT 236 Input parameter: 237 A - the matrix 238 x - the vector on which BDFT will be performed 239 240 Output parameter: 241 y - vector that stores result of BDFT 242 */ 243 #undef __FUNCT__ 244 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 245 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 246 { 247 PetscErrorCode ierr; 248 Mat_FFT *fft = (Mat_FFT*)A->data; 249 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 250 PetscScalar *x_array,*y_array; 251 PetscInt ndim=fft->ndim,*dim=fft->dim; 252 MPI_Comm comm=((PetscObject)A)->comm; 253 254 PetscFunctionBegin; 255 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 256 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 257 if (!fftw->p_backward) { /* create a plan, then excute it */ 258 switch (ndim) { 259 case 1: 260 #if defined(PETSC_USE_COMPLEX) 261 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 262 #else 263 SETERRQ(comm,PETSC_ERR_SUP,"not support for real numbers yet"); 264 #endif 265 break; 266 case 2: 267 #if defined(PETSC_USE_COMPLEX) /* For complex transforms call fftw_mpi_plan_dft with flag FFTW_BACKWARD, for real transforms call fftw_mpi_plan_dft_c2r */ 268 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 269 #else 270 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 271 #endif 272 break; 273 case 3: 274 #if defined(PETSC_USE_COMPLEX) 275 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 276 #else 277 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 278 #endif 279 break; 280 default: 281 #if defined(PETSC_USE_COMPLEX) 282 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 283 #else 284 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 285 #endif 286 break; 287 } 288 fftw->binarray = x_array; 289 fftw->boutarray = y_array; 290 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 291 } else { /* use existing plan */ 292 if (fftw->binarray != x_array || fftw->boutarray != y_array) { /* use existing plan on new arrays */ 293 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 294 } else { 295 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 296 } 297 } 298 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 299 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatDestroy_FFTW" 305 PetscErrorCode MatDestroy_FFTW(Mat A) 306 { 307 Mat_FFT *fft = (Mat_FFT*)A->data; 308 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 309 PetscErrorCode ierr; 310 311 PetscFunctionBegin; 312 fftw_destroy_plan(fftw->p_forward); 313 fftw_destroy_plan(fftw->p_backward); 314 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 315 ierr = PetscFree(fft->data);CHKERRQ(ierr); 316 fftw_mpi_cleanup(); 317 PetscFunctionReturn(0); 318 } 319 320 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 321 #undef __FUNCT__ 322 #define __FUNCT__ "VecDestroy_MPIFFTW" 323 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 324 { 325 PetscErrorCode ierr; 326 PetscScalar *array; 327 328 PetscFunctionBegin; 329 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 330 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 331 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 332 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 333 PetscFunctionReturn(0); 334 } 335 336 #undef __FUNCT__ 337 #define __FUNCT__ "MatGetVecsFFTW" 338 /*@ 339 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 340 parallel layout determined by FFTW 341 342 Collective on Mat 343 344 Input Parameter: 345 . A - the matrix 346 347 Output Parameter: 348 + x - (optional) input vector of forward FFTW 349 - y - (optional) output vector of forward FFTW 350 - z - (optional) output vector of backward FFTW 351 352 Level: advanced 353 354 Note: The parallel layout of output of forward FFTW is always same as the input 355 of the backward FFTW. But parallel layout of the input vector of forward 356 FFTW might not be same as the output of backward FFTW. 357 Also note that we need to provide enough space while doing parallel real transform. 358 We need to pad extra zeros at the end of last dimension. For this reason the one needs to 359 invoke the routine fftw_mpi_local_size_transposed routines. Remember one has to change the 360 last dimension from n to n/2+1 while invoking this routine. The number of zeros to be padded 361 depends on if the last dimension is even or odd. If the last dimension is even need to pad two 362 zeros if it is odd only one zero is needed. 363 Lastly one needs some scratch space at the end of data set in each process. alloc_local 364 figures out how much space is needed, i.e. it figures out the data+scratch space for 365 each processor and returns that. 366 367 .seealso: MatCreateFFTW() 368 @*/ 369 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 370 { 371 PetscErrorCode ierr; 372 373 PetscFunctionBegin; 374 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 378 EXTERN_C_BEGIN 379 #undef __FUNCT__ 380 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 381 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 382 { 383 PetscErrorCode ierr; 384 PetscMPIInt size,rank; 385 MPI_Comm comm=((PetscObject)A)->comm; 386 Mat_FFT *fft = (Mat_FFT*)A->data; 387 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 388 PetscInt N=fft->N; 389 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 390 391 PetscFunctionBegin; 392 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 393 PetscValidType(A,1); 394 395 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 396 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 397 if (size == 1) { /* sequential case */ 398 #if defined(PETSC_USE_COMPLEX) 399 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 400 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 401 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 402 #else 403 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fin);CHKERRQ(ierr);} 404 if (fout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,fout);CHKERRQ(ierr);} 405 if (bout) {ierr = VecCreateSeq(PETSC_COMM_SELF,n,bout);CHKERRQ(ierr);} 406 #endif 407 } else { /* parallel cases */ 408 ptrdiff_t alloc_local,local_n0,local_0_start; 409 ptrdiff_t local_n1; 410 fftw_complex *data_fout; 411 ptrdiff_t local_1_start; 412 #if defined(PETSC_USE_COMPLEX) 413 fftw_complex *data_fin,*data_bout; 414 #else 415 double *data_finr,*data_boutr; 416 PetscInt n1,N1; 417 ptrdiff_t temp; 418 #endif 419 420 switch (ndim) { 421 case 1: 422 #if !defined(PETSC_USE_COMPLEX) 423 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 424 #else 425 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 426 if (fin) { 427 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 428 ierr = VecCreateMPIWithArray(comm,1,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 429 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 430 } 431 if (fout) { 432 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 433 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 434 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 435 } 436 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 437 if (bout) { 438 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 439 ierr = VecCreateMPIWithArray(comm,1,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 440 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 441 } 442 break; 443 #endif 444 case 2: 445 #if !defined(PETSC_USE_COMPLEX) /* Note that N1 is no more the product of individual dimensions */ 446 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 447 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 448 if (fin) { 449 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 450 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 451 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 452 } 453 if (bout) { 454 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 455 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 456 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 457 } 458 if (fout) { 459 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 460 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 461 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 #else 464 /* Get local size */ 465 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 466 if (fin) { 467 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 468 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 469 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 470 } 471 if (fout) { 472 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 473 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 474 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 475 } 476 if (bout) { 477 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 478 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 479 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 480 } 481 #endif 482 break; 483 case 3: 484 #if !defined(PETSC_USE_COMPLEX) 485 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 486 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 487 if (fin) { 488 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 489 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 490 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 491 } 492 if (bout) { 493 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 494 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 495 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 496 } 497 498 if (fout) { 499 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 500 ierr = VecCreateMPIWithArray(comm,1,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 501 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 502 } 503 #else 504 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 505 if (fin) { 506 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 507 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 508 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 509 } 510 if (fout) { 511 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 512 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 513 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (bout) { 516 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 517 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 518 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 #endif 521 break; 522 default: 523 #if !defined(PETSC_USE_COMPLEX) 524 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 525 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 526 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 527 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 528 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 529 530 if (fin) { 531 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 532 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 533 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 534 } 535 if (bout) { 536 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 537 ierr = VecCreateMPIWithArray(comm,1,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 538 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 539 } 540 541 if (fout) { 542 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 543 ierr = VecCreateMPIWithArray(comm,1,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 544 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 545 } 546 #else 547 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 548 if (fin) { 549 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 550 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 551 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 552 } 553 if (fout) { 554 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 555 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 556 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (bout) { 559 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 560 ierr = VecCreateMPIWithArray(comm,1,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 #endif 564 break; 565 } 566 } 567 PetscFunctionReturn(0); 568 } 569 EXTERN_C_END 570 571 #undef __FUNCT__ 572 #define __FUNCT__ "VecScatterPetscToFFTW" 573 /*@ 574 VecScatterPetscToFFTW - Copies the PETSc vector to the vector that goes into FFTW block. 575 576 Collective on Mat 577 578 Input Parameters: 579 + A - FFTW matrix 580 - x - the PETSc vector 581 582 Output Parameters: 583 . y - the FFTW vector 584 585 Options Database Keys: 586 . -mat_fftw_plannerflags - set FFTW planner flags 587 588 Level: intermediate 589 590 Note: For real parallel FFT, FFTW requires insertion of extra space at the end of last dimension. This required even when 591 one is not doing in-place transform. The last dimension size must be changed to 2*(dim[last]/2+1) to accommodate these extra 592 zeros. This routine does that job by scattering operation. 593 594 .seealso: VecScatterFFTWToPetsc() 595 @*/ 596 PetscErrorCode VecScatterPetscToFFTW(Mat A,Vec x,Vec y) 597 { 598 PetscErrorCode ierr; 599 600 PetscFunctionBegin; 601 ierr = PetscTryMethod(A,"VecScatterPetscToFFTW_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 602 PetscFunctionReturn(0); 603 } 604 605 EXTERN_C_BEGIN 606 #undef __FUNCT__ 607 #define __FUNCT__ "VecScatterPetscToFFTW_FFTW" 608 PetscErrorCode VecScatterPetscToFFTW_FFTW(Mat A,Vec x,Vec y) 609 { 610 PetscErrorCode ierr; 611 MPI_Comm comm=((PetscObject)A)->comm; 612 Mat_FFT *fft = (Mat_FFT*)A->data; 613 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 614 PetscInt N=fft->N; 615 PetscInt ndim=fft->ndim,*dim=fft->dim; 616 PetscInt low; 617 PetscInt rank,size,vsize,vsize1; 618 ptrdiff_t alloc_local,local_n0,local_0_start; 619 ptrdiff_t local_n1,local_1_start; 620 VecScatter vecscat; 621 IS list1,list2; 622 #if !defined(PETSC_USE_COMPLEX) 623 PetscInt i,j,k,partial_dim; 624 PetscInt *indx1, *indx2, tempindx, tempindx1; 625 PetscInt N1, n1 ,NM; 626 ptrdiff_t temp; 627 #endif 628 629 PetscFunctionBegin; 630 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 631 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 632 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 633 634 if (size==1) { 635 ierr = VecGetSize(x,&vsize);CHKERRQ(ierr); 636 ierr = VecGetSize(y,&vsize1);CHKERRQ(ierr); 637 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 638 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 639 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 640 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 641 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 642 ierr = ISDestroy(&list1);CHKERRQ(ierr); 643 } else { 644 switch (ndim) { 645 case 1: 646 #if defined(PETSC_USE_COMPLEX) 647 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 648 ierr = ISCreateStride(comm,local_n0,local_0_start,1,&list1); 649 ierr = ISCreateStride(comm,local_n0,low,1,&list2); 650 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 651 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 652 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 653 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 654 ierr = ISDestroy(&list1);CHKERRQ(ierr); 655 ierr = ISDestroy(&list2);CHKERRQ(ierr); 656 #else 657 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 658 #endif 659 break; 660 case 2: 661 #if defined(PETSC_USE_COMPLEX) 662 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 663 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 664 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 665 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 666 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 667 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 668 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 669 ierr = ISDestroy(&list1);CHKERRQ(ierr); 670 ierr = ISDestroy(&list2);CHKERRQ(ierr); 671 #else 672 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 673 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 674 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 675 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 676 677 if (dim[1]%2==0) { 678 NM = dim[1]+2; 679 } else { 680 NM = dim[1]+1; 681 } 682 for (i=0;i<local_n0;i++) { 683 for (j=0;j<dim[1];j++) { 684 tempindx = i*dim[1] + j; 685 tempindx1 = i*NM + j; 686 indx1[tempindx]=local_0_start*dim[1]+tempindx; 687 indx2[tempindx]=low+tempindx1; 688 } 689 } 690 691 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 692 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 693 694 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 695 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 696 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 697 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 698 ierr = ISDestroy(&list1);CHKERRQ(ierr); 699 ierr = ISDestroy(&list2);CHKERRQ(ierr); 700 ierr = PetscFree(indx1);CHKERRQ(ierr); 701 ierr = PetscFree(indx2);CHKERRQ(ierr); 702 #endif 703 break; 704 705 case 3: 706 #if defined(PETSC_USE_COMPLEX) 707 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 708 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 709 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 710 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 711 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 712 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 713 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 714 ierr = ISDestroy(&list1);CHKERRQ(ierr); 715 ierr = ISDestroy(&list2);CHKERRQ(ierr); 716 #else 717 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 718 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 719 720 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 721 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 722 723 if (dim[2]%2==0) NM = dim[2]+2; 724 else NM = dim[2]+1; 725 726 for (i=0;i<local_n0;i++) { 727 for (j=0;j<dim[1];j++) { 728 for (k=0;k<dim[2];k++) { 729 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 730 tempindx1 = i*dim[1]*NM + j*NM + k; 731 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 732 indx2[tempindx]=low+tempindx1; 733 } 734 } 735 } 736 737 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 738 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 739 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 740 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 741 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 742 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 743 ierr = ISDestroy(&list1);CHKERRQ(ierr); 744 ierr = ISDestroy(&list2);CHKERRQ(ierr); 745 ierr = PetscFree(indx1);CHKERRQ(ierr); 746 ierr = PetscFree(indx2);CHKERRQ(ierr); 747 #endif 748 break; 749 750 default: 751 #if defined(PETSC_USE_COMPLEX) 752 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 753 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 754 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 755 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 756 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 757 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 758 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 759 ierr = ISDestroy(&list1);CHKERRQ(ierr); 760 ierr = ISDestroy(&list2);CHKERRQ(ierr); 761 #else 762 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 763 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 764 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 765 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 766 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 767 768 partial_dim = fftw->partial_dim; 769 770 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 771 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 772 773 if (dim[ndim-1]%2==0) NM = 2; 774 else NM = 1; 775 776 j = low; 777 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 778 indx1[i] = local_0_start*partial_dim + i; 779 indx2[i] = j; 780 if (k%dim[ndim-1]==0) { j+=NM;} 781 j++; 782 } 783 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 784 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 785 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 786 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 787 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 788 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 789 ierr = ISDestroy(&list1);CHKERRQ(ierr); 790 ierr = ISDestroy(&list2);CHKERRQ(ierr); 791 ierr = PetscFree(indx1);CHKERRQ(ierr); 792 ierr = PetscFree(indx2);CHKERRQ(ierr); 793 #endif 794 break; 795 } 796 } 797 PetscFunctionReturn(0); 798 } 799 EXTERN_C_END 800 801 #undef __FUNCT__ 802 #define __FUNCT__ "VecScatterFFTWToPetsc" 803 /*@ 804 VecScatterFFTWToPetsc - Converts FFTW output to the PETSc vector. 805 806 Collective on Mat 807 808 Input Parameters: 809 + A - FFTW matrix 810 - x - FFTW vector 811 812 Output Parameters: 813 . y - PETSc vector 814 815 Level: intermediate 816 817 Note: While doing real transform the FFTW output of backward DFT contains extra zeros at the end of last dimension. 818 VecScatterFFTWToPetsc removes those extra zeros. 819 820 .seealso: VecScatterPetscToFFTW() 821 @*/ 822 PetscErrorCode VecScatterFFTWToPetsc(Mat A,Vec x,Vec y) 823 { 824 PetscErrorCode ierr; 825 826 PetscFunctionBegin; 827 ierr = PetscTryMethod(A,"VecScatterFFTWToPetsc_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 EXTERN_C_BEGIN 832 #undef __FUNCT__ 833 #define __FUNCT__ "VecScatterFFTWToPetsc_FFTW" 834 PetscErrorCode VecScatterFFTWToPetsc_FFTW(Mat A,Vec x,Vec y) 835 { 836 PetscErrorCode ierr; 837 MPI_Comm comm=((PetscObject)A)->comm; 838 Mat_FFT *fft = (Mat_FFT*)A->data; 839 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 840 PetscInt N=fft->N; 841 PetscInt ndim=fft->ndim,*dim=fft->dim; 842 PetscInt low; 843 PetscInt rank,size; 844 ptrdiff_t alloc_local,local_n0,local_0_start; 845 ptrdiff_t local_n1,local_1_start; 846 VecScatter vecscat; 847 IS list1,list2; 848 #if !defined(PETSC_USE_COMPLEX) 849 PetscInt i,j,k,partial_dim; 850 PetscInt *indx1, *indx2, tempindx, tempindx1; 851 PetscInt N1, n1 ,NM; 852 ptrdiff_t temp; 853 #endif 854 855 PetscFunctionBegin; 856 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 857 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 858 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 859 860 if (size==1) { 861 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 862 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 863 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 864 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 865 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 866 ierr = ISDestroy(&list1);CHKERRQ(ierr); 867 868 } else { 869 switch (ndim) { 870 case 1: 871 #if defined(PETSC_USE_COMPLEX) 872 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 873 ierr = ISCreateStride(comm,local_n1,local_1_start,1,&list1); 874 ierr = ISCreateStride(comm,local_n1,low,1,&list2); 875 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 876 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 877 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 878 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 879 ierr = ISDestroy(&list1);CHKERRQ(ierr); 880 ierr = ISDestroy(&list2);CHKERRQ(ierr); 881 #else 882 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 883 #endif 884 break; 885 case 2: 886 #if defined(PETSC_USE_COMPLEX) 887 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 888 ierr = ISCreateStride(comm,local_n0*dim[1],local_0_start*dim[1],1,&list1); 889 ierr = ISCreateStride(comm,local_n0*dim[1],low,1,&list2); 890 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 891 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 892 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 893 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 894 ierr = ISDestroy(&list1);CHKERRQ(ierr); 895 ierr = ISDestroy(&list2);CHKERRQ(ierr); 896 #else 897 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 898 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 899 900 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 901 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 902 903 if (dim[1]%2==0) NM = dim[1]+2; 904 else NM = dim[1]+1; 905 906 for (i=0;i<local_n0;i++) { 907 for (j=0;j<dim[1];j++) { 908 tempindx = i*dim[1] + j; 909 tempindx1 = i*NM + j; 910 indx1[tempindx]=local_0_start*dim[1]+tempindx; 911 indx2[tempindx]=low+tempindx1; 912 } 913 } 914 915 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 916 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 917 918 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 919 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 920 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 921 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 922 ierr = ISDestroy(&list1);CHKERRQ(ierr); 923 ierr = ISDestroy(&list2);CHKERRQ(ierr); 924 ierr = PetscFree(indx1);CHKERRQ(ierr); 925 ierr = PetscFree(indx2);CHKERRQ(ierr); 926 #endif 927 break; 928 case 3: 929 #if defined(PETSC_USE_COMPLEX) 930 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 931 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 932 ierr = ISCreateStride(comm,local_n0*dim[1]*dim[2],low,1,&list2); 933 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 934 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 935 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 936 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 937 ierr = ISDestroy(&list1);CHKERRQ(ierr); 938 ierr = ISDestroy(&list2);CHKERRQ(ierr); 939 #else 940 941 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 942 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 943 944 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 945 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 946 947 if (dim[2]%2==0) NM = dim[2]+2; 948 else NM = dim[2]+1; 949 950 for (i=0;i<local_n0;i++) { 951 for (j=0;j<dim[1];j++) { 952 for (k=0;k<dim[2];k++) { 953 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 954 tempindx1 = i*dim[1]*NM + j*NM + k; 955 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 956 indx2[tempindx]=low+tempindx1; 957 } 958 } 959 } 960 961 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 962 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 963 964 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 965 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 966 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 967 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 968 ierr = ISDestroy(&list1);CHKERRQ(ierr); 969 ierr = ISDestroy(&list2);CHKERRQ(ierr); 970 ierr = PetscFree(indx1);CHKERRQ(ierr); 971 ierr = PetscFree(indx2);CHKERRQ(ierr); 972 #endif 973 break; 974 default: 975 #if defined(PETSC_USE_COMPLEX) 976 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 977 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 978 ierr = ISCreateStride(comm,local_n0*(fftw->partial_dim),low,1,&list2); 979 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 980 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 981 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 982 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 983 ierr = ISDestroy(&list1);CHKERRQ(ierr); 984 ierr = ISDestroy(&list2);CHKERRQ(ierr); 985 #else 986 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 987 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 988 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 989 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 990 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 991 992 partial_dim = fftw->partial_dim; 993 994 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 995 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 996 997 if (dim[ndim-1]%2==0) NM = 2; 998 else NM = 1; 999 1000 j = low; 1001 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) { 1002 indx1[i] = local_0_start*partial_dim + i; 1003 indx2[i] = j; 1004 if (k%dim[ndim-1]==0)j+=NM; 1005 j++; 1006 } 1007 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1008 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1009 1010 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1011 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1012 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1013 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1014 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1015 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1016 ierr = PetscFree(indx1);CHKERRQ(ierr); 1017 ierr = PetscFree(indx2);CHKERRQ(ierr); 1018 #endif 1019 break; 1020 } 1021 } 1022 PetscFunctionReturn(0); 1023 } 1024 EXTERN_C_END 1025 1026 EXTERN_C_BEGIN 1027 #undef __FUNCT__ 1028 #define __FUNCT__ "MatCreate_FFTW" 1029 /* 1030 MatCreate_FFTW - Creates a matrix object that provides FFT via the external package FFTW 1031 1032 Options Database Keys: 1033 + -mat_fftw_plannerflags - set FFTW planner flags 1034 1035 Level: intermediate 1036 1037 */ 1038 PetscErrorCode MatCreate_FFTW(Mat A) 1039 { 1040 PetscErrorCode ierr; 1041 MPI_Comm comm=((PetscObject)A)->comm; 1042 Mat_FFT *fft=(Mat_FFT*)A->data; 1043 Mat_FFTW *fftw; 1044 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1045 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1046 PetscBool flg; 1047 PetscInt p_flag,partial_dim=1,ctr; 1048 PetscMPIInt size,rank; 1049 ptrdiff_t *pdim; 1050 ptrdiff_t local_n1,local_1_start; 1051 #if !defined(PETSC_USE_COMPLEX) 1052 ptrdiff_t temp; 1053 PetscInt N1,tot_dim; 1054 #else 1055 PetscInt n1; 1056 #endif 1057 1058 PetscFunctionBegin; 1059 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1060 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1061 1062 fftw_mpi_init(); 1063 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1064 pdim[0] = dim[0]; 1065 #if !defined(PETSC_USE_COMPLEX) 1066 tot_dim = 2*dim[0]; 1067 #endif 1068 for (ctr=1;ctr<ndim;ctr++) { 1069 partial_dim *= dim[ctr]; 1070 pdim[ctr] = dim[ctr]; 1071 #if !defined(PETSC_USE_COMPLEX) 1072 if (ctr==ndim-1) tot_dim *= (dim[ctr]/2+1); 1073 else tot_dim *= dim[ctr]; 1074 #endif 1075 } 1076 1077 if (size == 1) { 1078 #if defined(PETSC_USE_COMPLEX) 1079 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1080 n = N; 1081 #else 1082 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1083 n = tot_dim; 1084 #endif 1085 1086 } else { 1087 ptrdiff_t alloc_local,local_n0,local_0_start; 1088 switch (ndim) { 1089 case 1: 1090 #if !defined(PETSC_USE_COMPLEX) 1091 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1092 #else 1093 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1094 n = (PetscInt)local_n0; 1095 n1 = (PetscInt) local_n1; 1096 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1097 #endif 1098 break; 1099 case 2: 1100 #if defined(PETSC_USE_COMPLEX) 1101 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1102 /* 1103 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1104 PetscSynchronizedFlush(comm); 1105 */ 1106 n = (PetscInt)local_n0*dim[1]; 1107 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1108 #else 1109 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1110 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1111 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1112 #endif 1113 break; 1114 case 3: 1115 #if defined(PETSC_USE_COMPLEX) 1116 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1117 n = (PetscInt)local_n0*dim[1]*dim[2]; 1118 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1119 #else 1120 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1121 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1122 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1123 #endif 1124 break; 1125 default: 1126 #if defined(PETSC_USE_COMPLEX) 1127 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1128 n = (PetscInt)local_n0*partial_dim; 1129 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1130 #else 1131 temp = pdim[ndim-1]; 1132 pdim[ndim-1] = temp/2 + 1; 1133 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1134 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1135 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1136 pdim[ndim-1] = temp; 1137 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1138 #endif 1139 break; 1140 } 1141 } 1142 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1143 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1144 fft->data = (void*)fftw; 1145 1146 fft->n = n; 1147 fftw->ndim_fftw = (ptrdiff_t)ndim; /* This is dimension of fft */ 1148 fftw->partial_dim = partial_dim; 1149 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1150 for (ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1151 1152 fftw->p_forward = 0; 1153 fftw->p_backward = 0; 1154 fftw->p_flag = FFTW_ESTIMATE; 1155 1156 if (size == 1) { 1157 A->ops->mult = MatMult_SeqFFTW; 1158 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1159 } else { 1160 A->ops->mult = MatMult_MPIFFTW; 1161 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1162 } 1163 fft->matdestroy = MatDestroy_FFTW; 1164 A->assembled = PETSC_TRUE; 1165 A->preallocated = PETSC_TRUE; 1166 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW);CHKERRQ(ierr); 1167 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterPetscToFFTW_C","VecScatterPetscToFFTW_FFTW",VecScatterPetscToFFTW_FFTW);CHKERRQ(ierr); 1168 ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"VecScatterFFTWToPetsc_C","VecScatterFFTWToPetsc_FFTW",VecScatterFFTWToPetsc_FFTW);CHKERRQ(ierr); 1169 1170 /* get runtime options */ 1171 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1172 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1173 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1174 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1175 PetscFunctionReturn(0); 1176 } 1177 EXTERN_C_END 1178 1179 1180 1181 1182