1 2 /* 3 Provides an interface to the FFTW package. 4 Testing examples can be found in ~src/mat/examples/tests 5 */ 6 7 #include <../src/mat/impls/fft/fft.h> /*I "petscmat.h" I*/ 8 EXTERN_C_BEGIN 9 #include <fftw3-mpi.h> 10 EXTERN_C_END 11 12 typedef struct { 13 ptrdiff_t ndim_fftw,*dim_fftw; 14 PetscInt partial_dim; 15 fftw_plan p_forward,p_backward; 16 unsigned p_flag; /* planner flags, FFTW_ESTIMATE,FFTW_MEASURE, FFTW_PATIENT, FFTW_EXHAUSTIVE */ 17 PetscScalar *finarray,*foutarray,*binarray,*boutarray; /* keep track of arrays becaue fftw plan should be 18 executed for the arrays with which the plan was created */ 19 } Mat_FFTW; 20 21 extern PetscErrorCode MatMult_SeqFFTW(Mat,Vec,Vec); 22 extern PetscErrorCode MatMultTranspose_SeqFFTW(Mat,Vec,Vec); 23 extern PetscErrorCode MatMult_MPIFFTW(Mat,Vec,Vec); 24 extern PetscErrorCode MatMultTranspose_MPIFFTW(Mat,Vec,Vec); 25 extern PetscErrorCode MatDestroy_FFTW(Mat); 26 extern PetscErrorCode VecDestroy_MPIFFTW(Vec); 27 extern PetscErrorCode MatGetVecs_FFTW(Mat,Vec*,Vec*); // to be removed! 28 29 #undef __FUNCT__ 30 #define __FUNCT__ "MatMult_SeqFFTW" 31 PetscErrorCode MatMult_SeqFFTW(Mat A,Vec x,Vec y) 32 { 33 PetscErrorCode ierr; 34 Mat_FFT *fft = (Mat_FFT*)A->data; 35 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 36 PetscScalar *x_array,*y_array; 37 PetscInt ndim=fft->ndim,*dim=fft->dim; 38 39 PetscFunctionBegin; 40 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 41 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 42 if (!fftw->p_forward){ /* create a plan, then excute it */ 43 switch (ndim){ 44 case 1: 45 #if defined(PETSC_USE_COMPLEX) 46 fftw->p_forward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 47 #else 48 fftw->p_forward = fftw_plan_dft_r2c_1d(dim[0],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 49 #endif 50 break; 51 case 2: 52 #if defined(PETSC_USE_COMPLEX) 53 fftw->p_forward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 54 #else 55 fftw->p_forward = fftw_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 56 #endif 57 break; 58 case 3: 59 #if defined(PETSC_USE_COMPLEX) 60 fftw->p_forward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 61 #else 62 fftw->p_forward = fftw_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 63 #endif 64 break; 65 default: 66 #if defined(PETSC_USE_COMPLEX) 67 fftw->p_forward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_FORWARD,fftw->p_flag); 68 #else 69 fftw->p_forward = fftw_plan_dft_r2c(ndim,dim,(double *)x_array,(fftw_complex*)y_array,fftw->p_flag); 70 #endif 71 break; 72 } 73 fftw->finarray = x_array; 74 fftw->foutarray = y_array; 75 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 76 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 77 fftw_execute(fftw->p_forward); 78 } else { /* use existing plan */ 79 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 80 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 81 } else { 82 fftw_execute(fftw->p_forward); 83 } 84 } 85 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 86 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 87 PetscFunctionReturn(0); 88 } 89 90 #undef __FUNCT__ 91 #define __FUNCT__ "MatMultTranspose_SeqFFTW" 92 PetscErrorCode MatMultTranspose_SeqFFTW(Mat A,Vec x,Vec y) 93 { 94 PetscErrorCode ierr; 95 Mat_FFT *fft = (Mat_FFT*)A->data; 96 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 97 PetscScalar *x_array,*y_array; 98 PetscInt ndim=fft->ndim,*dim=fft->dim; 99 100 PetscFunctionBegin; 101 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 102 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 103 if (!fftw->p_backward){ /* create a plan, then excute it */ 104 switch (ndim){ 105 case 1: 106 #if defined(PETSC_USE_COMPLEX) 107 fftw->p_backward = fftw_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 108 #else 109 fftw->p_backward= fftw_plan_dft_c2r_1d(dim[0],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 110 #endif 111 break; 112 case 2: 113 #if defined(PETSC_USE_COMPLEX) 114 fftw->p_backward = fftw_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 115 #else 116 fftw->p_backward= fftw_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 117 #endif 118 break; 119 case 3: 120 #if defined(PETSC_USE_COMPLEX) 121 fftw->p_backward = fftw_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 122 #else 123 fftw->p_backward= fftw_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 124 #endif 125 break; 126 default: 127 #if defined(PETSC_USE_COMPLEX) 128 fftw->p_backward = fftw_plan_dft(ndim,dim,(fftw_complex*)x_array,(fftw_complex*)y_array,FFTW_BACKWARD,fftw->p_flag); 129 #else 130 fftw->p_backward= fftw_plan_dft_c2r(ndim,dim,(fftw_complex*)x_array,(double *)y_array,fftw->p_flag); 131 #endif 132 break; 133 } 134 fftw->binarray = x_array; 135 fftw->boutarray = y_array; 136 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 137 } else { /* use existing plan */ 138 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 139 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 140 } else { 141 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 142 } 143 } 144 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 145 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 146 PetscFunctionReturn(0); 147 } 148 149 #undef __FUNCT__ 150 #define __FUNCT__ "MatMult_MPIFFTW" 151 PetscErrorCode MatMult_MPIFFTW(Mat A,Vec x,Vec y) 152 { 153 PetscErrorCode ierr; 154 Mat_FFT *fft = (Mat_FFT*)A->data; 155 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 156 PetscScalar *x_array,*y_array; 157 PetscInt ndim=fft->ndim,*dim=fft->dim; 158 MPI_Comm comm=((PetscObject)A)->comm; 159 160 PetscFunctionBegin; 161 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 162 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 163 if (!fftw->p_forward){ /* create a plan, then excute it */ 164 switch (ndim){ 165 case 1: 166 #if defined(PETSC_USE_COMPLEX) 167 fftw->p_forward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 168 #else 169 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 170 #endif 171 break; 172 case 2: 173 #if defined(PETSC_USE_COMPLEX) 174 fftw->p_forward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 175 #else 176 fftw->p_forward = fftw_mpi_plan_dft_r2c_2d(dim[0],dim[1],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 177 #endif 178 break; 179 case 3: 180 #if defined(PETSC_USE_COMPLEX) 181 fftw->p_forward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 182 #else 183 fftw->p_forward = fftw_mpi_plan_dft_r2c_3d(dim[0],dim[1],dim[2],(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 184 #endif 185 break; 186 default: 187 #if defined(PETSC_USE_COMPLEX) 188 fftw->p_forward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_FORWARD,fftw->p_flag); 189 #else 190 fftw->p_forward = fftw_mpi_plan_dft_r2c(fftw->ndim_fftw,fftw->dim_fftw,(double *)x_array,(fftw_complex*)y_array,comm,FFTW_ESTIMATE); 191 #endif 192 break; 193 } 194 fftw->finarray = x_array; 195 fftw->foutarray = y_array; 196 /* Warning: if (fftw->p_flag!==FFTW_ESTIMATE) The data in the in/out arrays is overwritten! 197 planning should be done before x is initialized! See FFTW manual sec2.1 or sec4 */ 198 fftw_execute(fftw->p_forward); 199 } else { /* use existing plan */ 200 if (fftw->finarray != x_array || fftw->foutarray != y_array){ /* use existing plan on new arrays */ 201 fftw_execute_dft(fftw->p_forward,(fftw_complex*)x_array,(fftw_complex*)y_array); 202 } else { 203 fftw_execute(fftw->p_forward); 204 } 205 } 206 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 207 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 208 PetscFunctionReturn(0); 209 } 210 211 #undef __FUNCT__ 212 #define __FUNCT__ "MatMultTranspose_MPIFFTW" 213 PetscErrorCode MatMultTranspose_MPIFFTW(Mat A,Vec x,Vec y) 214 { 215 PetscErrorCode ierr; 216 Mat_FFT *fft = (Mat_FFT*)A->data; 217 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 218 PetscScalar *x_array,*y_array; 219 PetscInt ndim=fft->ndim,*dim=fft->dim; 220 MPI_Comm comm=((PetscObject)A)->comm; 221 222 PetscFunctionBegin; 223 ierr = VecGetArray(x,&x_array);CHKERRQ(ierr); 224 ierr = VecGetArray(y,&y_array);CHKERRQ(ierr); 225 if (!fftw->p_backward){ /* create a plan, then excute it */ 226 switch (ndim){ 227 case 1: 228 #if defined(PETSC_USE_COMPLEX) 229 fftw->p_backward = fftw_mpi_plan_dft_1d(dim[0],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 230 #else 231 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers yet"); 232 #endif 233 break; 234 case 2: 235 #if defined(PETSC_USE_COMPLEX) 236 fftw->p_backward = fftw_mpi_plan_dft_2d(dim[0],dim[1],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 237 #else 238 fftw->p_backward = fftw_mpi_plan_dft_c2r_2d(dim[0],dim[1],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 239 #endif 240 break; 241 case 3: 242 #if defined(PETSC_USE_COMPLEX) 243 fftw->p_backward = fftw_mpi_plan_dft_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 244 #else 245 fftw->p_backward = fftw_mpi_plan_dft_c2r_3d(dim[0],dim[1],dim[2],(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 246 #endif 247 break; 248 default: 249 #if defined(PETSC_USE_COMPLEX) 250 fftw->p_backward = fftw_mpi_plan_dft(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(fftw_complex*)y_array,comm,FFTW_BACKWARD,fftw->p_flag); 251 #else 252 fftw->p_backward = fftw_mpi_plan_dft_c2r(fftw->ndim_fftw,fftw->dim_fftw,(fftw_complex*)x_array,(double *)y_array,comm,FFTW_ESTIMATE); 253 #endif 254 break; 255 } 256 fftw->binarray = x_array; 257 fftw->boutarray = y_array; 258 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 259 } else { /* use existing plan */ 260 if (fftw->binarray != x_array || fftw->boutarray != y_array){ /* use existing plan on new arrays */ 261 fftw_execute_dft(fftw->p_backward,(fftw_complex*)x_array,(fftw_complex*)y_array); 262 } else { 263 fftw_execute(fftw->p_backward);CHKERRQ(ierr); 264 } 265 } 266 ierr = VecRestoreArray(y,&y_array);CHKERRQ(ierr); 267 ierr = VecRestoreArray(x,&x_array);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 #undef __FUNCT__ 272 #define __FUNCT__ "MatDestroy_FFTW" 273 PetscErrorCode MatDestroy_FFTW(Mat A) 274 { 275 Mat_FFT *fft = (Mat_FFT*)A->data; 276 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 fftw_destroy_plan(fftw->p_forward); 281 fftw_destroy_plan(fftw->p_backward); 282 ierr = PetscFree(fftw->dim_fftw);CHKERRQ(ierr); 283 ierr = PetscFree(fft->data);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 #include <../src/vec/vec/impls/mpi/pvecimpl.h> /*I "petscvec.h" I*/ 288 #undef __FUNCT__ 289 #define __FUNCT__ "VecDestroy_MPIFFTW" 290 PetscErrorCode VecDestroy_MPIFFTW(Vec v) 291 { 292 PetscErrorCode ierr; 293 PetscScalar *array; 294 295 PetscFunctionBegin; 296 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 297 fftw_free((fftw_complex*)array);CHKERRQ(ierr); 298 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 299 ierr = VecDestroy_MPI(v);CHKERRQ(ierr); 300 PetscFunctionReturn(0); 301 } 302 303 #undef __FUNCT__ 304 #define __FUNCT__ "MatGetVecsFFTW_1DC" 305 /* 306 - Get Vectors(s) compatible with matrix, i.e. with the 307 parallel layout determined by FFTW-1D 308 309 */ 310 PetscErrorCode MatGetVecsFFTW_1DC(Mat A,Vec *fin,Vec *fout,Vec *bout) 311 { 312 PetscErrorCode ierr; 313 PetscMPIInt size,rank; 314 MPI_Comm comm=((PetscObject)A)->comm; 315 Mat_FFT *fft = (Mat_FFT*)A->data; 316 // Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 317 PetscInt N=fft->N; 318 PetscInt ndim=fft->ndim,*dim=fft->dim; 319 ptrdiff_t f_alloc_local,f_local_n0,f_local_0_start; 320 ptrdiff_t f_local_n1,f_local_1_end; 321 ptrdiff_t b_alloc_local,b_local_n0,b_local_0_start; 322 ptrdiff_t b_local_n1,b_local_1_end; 323 fftw_complex *data_fin,*data_fout,*data_bout; 324 325 PetscFunctionBegin; 326 #if !defined(PETSC_USE_COMPLEX) 327 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"not support for real numbers"); 328 #endif 329 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 330 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 331 if (size == 1){ 332 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D"); 333 } 334 else { 335 if (ndim>1){ 336 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel 1D");} 337 else { 338 f_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&f_local_n0,&f_local_0_start,&f_local_n1,&f_local_1_end); 339 b_alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&b_local_n0,&b_local_0_start,&b_local_n1,&b_local_1_end); 340 if (fin) { 341 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 342 ierr = VecCreateMPIWithArray(comm,f_local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 343 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 344 } 345 if (fout) { 346 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*f_alloc_local); 347 ierr = VecCreateMPIWithArray(comm,f_local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 348 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 349 } 350 if (bout) { 351 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*b_alloc_local); 352 ierr = VecCreateMPIWithArray(comm,b_local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 353 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 354 } 355 } 356 if (fin){ 357 ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 358 } 359 if (fout){ 360 ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 361 } 362 if (bout){ 363 ierr = PetscLayoutReference(A->rmap,&(*bout)->map);CHKERRQ(ierr); 364 } 365 PetscFunctionReturn(0); 366 } 367 368 369 } 370 371 #undef __FUNCT__ 372 #define __FUNCT__ "MatGetVecsFFTW" 373 /*@ 374 MatGetVecsFFTW - Get vector(s) compatible with the matrix, i.e. with the 375 parallel layout determined by FFTW 376 377 Collective on Mat 378 379 Input Parameter: 380 . mat - the matrix 381 382 Output Parameter: 383 + fin - (optional) input vector of forward FFTW 384 - fout - (optional) output vector of forward FFTW 385 386 Level: advanced 387 388 .seealso: MatCreateFFTW() 389 @*/ 390 PetscErrorCode MatGetVecsFFTW(Mat A,Vec *x,Vec *y,Vec *z) 391 { 392 PetscErrorCode ierr; 393 PetscFunctionBegin; 394 ierr = PetscTryMethod(A,"MatGetVecsFFTW_C",(Mat,Vec*,Vec*,Vec*),(A,x,y,z));CHKERRQ(ierr); 395 PetscFunctionReturn(0); 396 } 397 398 EXTERN_C_BEGIN 399 #undef __FUNCT__ 400 #define __FUNCT__ "MatGetVecsFFTW_FFTW" 401 PetscErrorCode MatGetVecsFFTW_FFTW(Mat A,Vec *fin,Vec *fout,Vec *bout) 402 { 403 PetscErrorCode ierr; 404 PetscMPIInt size,rank; 405 MPI_Comm comm=((PetscObject)A)->comm; 406 Mat_FFT *fft = (Mat_FFT*)A->data; 407 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 408 PetscInt N=fft->N; 409 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 410 411 PetscFunctionBegin; 412 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 413 PetscValidType(A,1); 414 415 ierr = MPI_Comm_size(PETSC_COMM_WORLD, &size);CHKERRQ(ierr); 416 ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr); 417 if (size == 1){ /* sequential case */ 418 #if defined(PETSC_USE_COMPLEX) 419 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 420 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 421 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,bout);CHKERRQ(ierr);} 422 #else 423 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 424 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 425 if (bout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],bout);CHKERRQ(ierr);} 426 #endif 427 } else { 428 ptrdiff_t alloc_local,local_n0,local_0_start; 429 ptrdiff_t local_n1; 430 fftw_complex *data_fout; 431 ptrdiff_t local_1_start; 432 #if defined(PETSC_USE_COMPLEX) 433 fftw_complex *data_fin,*data_bout; 434 #else 435 double *data_finr,*data_boutr; 436 PetscInt n1,N1,vsize; 437 ptrdiff_t temp; 438 #endif 439 440 switch (ndim){ 441 case 1: 442 #if !defined(PETSC_USE_COMPLEX) 443 SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"FFTW does not allow parallel real 1D transform"); 444 #else 445 //SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Not implemented yet"); 446 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 447 if (fin) { 448 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 449 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 450 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 451 } 452 if (fout) { 453 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 454 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 455 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 456 } 457 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 458 if (bout) { 459 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 460 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 461 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 462 } 463 break; 464 #endif 465 case 2: 466 #if !defined(PETSC_USE_COMPLEX) 467 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 468 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 469 if (fin) { 470 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 471 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 472 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 473 } 474 if (bout) { 475 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 476 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 477 ierr = VecGetSize(*bout,&vsize);CHKERRQ(ierr); 478 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 479 } 480 //n1 = 2*local_n1*dim[0]; 481 if (fout) { 482 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 483 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 484 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 485 } 486 #else 487 /* Get local size */ 488 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 489 if (fin) { 490 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 491 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 492 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 493 } 494 if (fout) { 495 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 496 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 497 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 498 } 499 if (bout) { 500 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 501 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 502 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 503 } 504 #endif 505 break; 506 case 3: 507 #if !defined(PETSC_USE_COMPLEX) 508 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 509 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 510 if (fin) { 511 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 512 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 513 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 514 } 515 if (bout) { 516 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 517 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 518 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 519 } 520 //n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 521 if (fout) { 522 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 523 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 524 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 525 } 526 #else 527 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 528 if (fin) { 529 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 530 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 531 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 532 } 533 if (fout) { 534 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 535 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 536 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 537 } 538 if (bout) { 539 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 540 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 541 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 542 } 543 #endif 544 break; 545 default: 546 #if !defined(PETSC_USE_COMPLEX) 547 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 548 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 549 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 550 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 551 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 552 553 if (fin) { 554 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 555 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 556 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 557 } 558 if (bout) { 559 data_boutr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 560 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_boutr,bout);CHKERRQ(ierr); 561 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 562 } 563 //temp = fftw->partial_dim; 564 //fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 565 //n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 566 if (fout) { 567 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 568 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 569 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 570 } 571 #else 572 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 573 if (fin) { 574 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 575 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 576 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 577 } 578 if (fout) { 579 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 580 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 581 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 582 } 583 if (bout) { 584 data_bout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 585 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_bout,bout);CHKERRQ(ierr); 586 (*bout)->ops->destroy = VecDestroy_MPIFFTW; 587 } 588 #endif 589 break; 590 } 591 } 592 PetscFunctionReturn(0); 593 } 594 EXTERN_C_END 595 596 #undef __FUNCT__ 597 #define __FUNCT__ "MatGetVecs_FFTW" 598 /* 599 MatGetVecs_FFTW - Get vector(s) compatible with the matrix, i.e. with the 600 parallel layout determined by FFTW 601 602 Collective on Mat 603 604 Input Parameter: 605 . mat - the matrix 606 607 Output Parameter: 608 + fin - (optional) input vector of forward FFTW 609 - fout - (optional) output vector of forward FFTW 610 611 Level: advanced 612 613 .seealso: MatCreateFFTW() 614 */ 615 PetscErrorCode MatGetVecs_FFTW(Mat A,Vec *fin,Vec *fout) 616 { 617 PetscErrorCode ierr; 618 PetscMPIInt size,rank; 619 MPI_Comm comm=((PetscObject)A)->comm; 620 Mat_FFT *fft = (Mat_FFT*)A->data; 621 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 622 PetscInt N=fft->N; 623 PetscInt ndim=fft->ndim,*dim=fft->dim,n=fft->n; 624 625 PetscFunctionBegin; 626 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 627 PetscValidType(A,1); 628 629 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 630 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 631 if (size == 1){ /* sequential case */ 632 #if defined(PETSC_USE_COMPLEX) 633 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N,fin);CHKERRQ(ierr);} 634 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,N,fout);CHKERRQ(ierr);} 635 #else 636 if (fin) {ierr = VecCreateSeq(PETSC_COMM_SELF,N*2*(dim[ndim-1]/2+1)/dim[ndim-1],fin);CHKERRQ(ierr);} 637 if (fout){ierr = VecCreateSeq(PETSC_COMM_SELF,2*N*(dim[ndim-1]/2+1)/dim[ndim-1],fout);CHKERRQ(ierr);} 638 #endif 639 } else { /* mpi case */ 640 ptrdiff_t alloc_local,local_n0,local_0_start; 641 ptrdiff_t local_n1,local_1_end; 642 fftw_complex *data_fin,*data_fout; 643 #if !defined(PETSC_USE_COMPLEX) 644 double *data_finr ; 645 ptrdiff_t local_1_start,temp; 646 PetscInt vsize,n1,N1; 647 #endif 648 649 // PetscInt ctr; 650 // ptrdiff_t ndim1,*pdim; 651 // ndim1=(ptrdiff_t) ndim; 652 // pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 653 654 // for(ctr=0;ctr<ndim;ctr++) 655 // {k 656 // pdim[ctr] = dim[ctr]; 657 // } 658 659 660 661 switch (ndim){ 662 case 1: 663 /* Get local size */ 664 /* We need to write an error message here saying that one cannot call this routine when doing parallel 1D real FFTW */ 665 // SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"Works only for parallel Multi-dimensional FFTW, Dimension>1. Check Documentation for MatGetVecs_FFTW1D routine"); 666 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_end); 667 printf("The values of local_n0 and local_n1 are %ld and %ld\n",local_n0,local_n1); 668 if (fin) { 669 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 670 ierr = VecCreateMPIWithArray(comm,local_n0,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 671 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 672 } 673 if (fout) { 674 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 675 ierr = VecCreateMPIWithArray(comm,local_n1,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 676 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 677 } 678 break; 679 case 2: 680 #if !defined(PETSC_USE_COMPLEX) 681 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 682 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 683 if (fin) { 684 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 685 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 686 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 687 //printf("The code comes here with vector size %d\n",vsize); 688 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 689 } 690 n1 = 2*local_n1*(dim[0]); 691 //n1 = 2*local_n1*dim[1]; 692 printf("The values are %ld\n",local_n1); 693 if (fout) { 694 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 695 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 696 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 697 } 698 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 699 700 #else 701 /* Get local size */ 702 //printf("Hope this does not come here"); 703 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 704 if (fin) { 705 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 706 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 707 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 708 } 709 if (fout) { 710 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 711 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 712 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 713 } 714 //printf("Hope this does not come here"); 715 #endif 716 break; 717 case 3: 718 /* Get local size */ 719 #if !defined(PETSC_USE_COMPLEX) 720 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 721 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 722 if (fin) { 723 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 724 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n1,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 725 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 726 //printf("The code comes here with vector size %d\n",vsize); 727 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 728 } 729 printf("The value is %ld",local_n1); 730 n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 731 if (fout) { 732 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 733 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 734 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 735 } 736 printf("Vector size from fftw.c is given by %d, %d\n",n1,N1); 737 738 739 #else 740 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 741 // printf("The quantity n is %d",n); 742 if (fin) { 743 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 744 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 745 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 746 } 747 if (fout) { 748 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 749 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 750 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 751 } 752 #endif 753 break; 754 default: 755 /* Get local size */ 756 #if !defined(PETSC_USE_COMPLEX) 757 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 758 printf("The value of temp is %ld\n",temp); 759 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 760 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 761 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 762 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 763 if (fin) { 764 data_finr=(double *)fftw_malloc(sizeof(double)*alloc_local*2); 765 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,(PetscInt)n,N1,(PetscScalar*)data_finr,fin);CHKERRQ(ierr); 766 ierr = VecGetSize(*fin,&vsize);CHKERRQ(ierr); 767 //printf("The code comes here with vector size %d\n",vsize); 768 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 769 } 770 printf("The value is %ld. Global length is %d \n",local_n1,N1); 771 temp = fftw->partial_dim; 772 fftw->partial_dim = fftw->partial_dim * ((fftw->dim_fftw)[fftw->ndim_fftw-1]/2+1)*(fftw->dim_fftw)[1]/((fftw->dim_fftw)[2]*(fftw->dim_fftw)[fftw->ndim_fftw-1]); 773 774 n1 = 2*local_n1*(fftw->partial_dim); fftw->partial_dim = temp; 775 if (fout) { 776 data_fout=(fftw_complex *)fftw_malloc(sizeof(fftw_complex)*alloc_local); 777 ierr = VecCreateMPIWithArray(PETSC_COMM_WORLD,n1,N1,(PetscScalar*)data_fout,fout);CHKERRQ(ierr); 778 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 779 } 780 781 #else 782 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 783 // printf("The value of alloc local is %d from process %d\n",alloc_local,rank); 784 // printf("The value of alloc local is %d",alloc_local); 785 // pdim=(ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 786 // for(i=0;i<ndim;i++) 787 // { 788 // pdim[i]=dim[i];printf("%d",pdim[i]); 789 // } 790 // alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 791 // printf("The quantity n is %d",n); 792 if (fin) { 793 data_fin = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 794 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fin,fin);CHKERRQ(ierr); 795 (*fin)->ops->destroy = VecDestroy_MPIFFTW; 796 } 797 if (fout) { 798 data_fout = (fftw_complex*)fftw_malloc(sizeof(fftw_complex)*alloc_local); 799 ierr = VecCreateMPIWithArray(comm,n,N,(const PetscScalar*)data_fout,fout);CHKERRQ(ierr); 800 (*fout)->ops->destroy = VecDestroy_MPIFFTW; 801 } 802 #endif 803 break; 804 } 805 } 806 // if (fin){ 807 // ierr = PetscLayoutReference(A->cmap,&(*fin)->map);CHKERRQ(ierr); 808 // } 809 // if (fout){ 810 // ierr = PetscLayoutReference(A->rmap,&(*fout)->map);CHKERRQ(ierr); 811 // } 812 PetscFunctionReturn(0); 813 } 814 815 #undef __FUNCT__ 816 #define __FUNCT__ "InputTransformFFT" 817 PetscErrorCode InputTransformFFT(Mat A,Vec x,Vec y) 818 { 819 PetscErrorCode ierr; 820 PetscFunctionBegin; 821 ierr = PetscTryMethod(A,"InputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 822 PetscFunctionReturn(0); 823 } 824 825 /* 826 InputTransformFFT_FFTW - Copies the user data to the vector that goes into FFTW block. For real 827 parallel FFT, this routine also performs padding of right number of zeros at the end of the fastetst 828 changing dimension. 829 Input A, x, y 830 A - FFTW matrix 831 x - user data 832 Options Database Keys: 833 + -mat_fftw_plannerflags - set FFTW planner flags 834 835 Level: intermediate 836 837 */ 838 839 EXTERN_C_BEGIN 840 #undef __FUNCT__ 841 #define __FUNCT__ "InputTransformFFT_FTTW" 842 PetscErrorCode InputTransformFFT_FFTW(Mat A,Vec x,Vec y) 843 { 844 PetscErrorCode ierr; 845 MPI_Comm comm=((PetscObject)A)->comm; 846 Mat_FFT *fft = (Mat_FFT*)A->data; 847 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 848 PetscInt N=fft->N; 849 PetscInt ndim=fft->ndim,*dim=fft->dim;//n=fft->n; 850 PetscInt low; 851 PetscInt rank,size; 852 ptrdiff_t alloc_local,local_n0,local_0_start; 853 ptrdiff_t local_n1,local_1_start; 854 VecScatter vecscat; 855 IS list1,list2; 856 #if !defined(PETSC_USE_COMPLEX) 857 PetscInt i,j,k,partial_dim; 858 PetscInt *indx1, *indx2, tempindx, tempindx1; 859 PetscInt N1, n1 ,NM; 860 ptrdiff_t temp; 861 #endif 862 863 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 864 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 865 ierr = VecGetOwnershipRange(y,&low,PETSC_NULL); 866 867 if (size==1) 868 { 869 /* switch (ndim){ 870 case 1: 871 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 872 for (i=0;i<dim[0];i++) 873 { 874 indx1[i] = i; 875 } 876 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 877 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 878 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 879 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 880 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 881 ierr = ISDestroy(&list1);CHKERRQ(ierr); 882 ierr = PetscFree(indx1);CHKERRQ(ierr); 883 break; 884 885 case 2: 886 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 887 for (i=0;i<dim[0];i++){ 888 for (j=0;j<dim[1];j++){ 889 indx1[i*dim[1]+j] = i*dim[1] + j; 890 } 891 } 892 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 893 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 894 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 895 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 896 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 897 ierr = ISDestroy(&list1);CHKERRQ(ierr); 898 ierr = PetscFree(indx1);CHKERRQ(ierr); 899 break; 900 case 3: 901 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 902 for (i=0;i<dim[0];i++){ 903 for (j=0;j<dim[1];j++){ 904 for (k=0;k<dim[2];k++){ 905 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 906 } 907 } 908 } 909 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 910 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 911 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 912 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 913 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 914 ierr = ISDestroy(&list1);CHKERRQ(ierr); 915 ierr = PetscFree(indx1);CHKERRQ(ierr); 916 break; 917 default: 918 */ 919 ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&list1);CHKERRQ(ierr); 920 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 921 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 922 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 923 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 924 ierr = ISDestroy(&list1);CHKERRQ(ierr); 925 // break; 926 // } 927 } 928 929 else{ 930 931 switch (ndim){ 932 case 1: 933 #if defined(PETSC_USE_COMPLEX) 934 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 935 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,local_0_start,1,&list1); 936 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0,low,1,&list2); 937 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 938 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 939 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 940 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 941 ierr = ISDestroy(&list1);CHKERRQ(ierr); 942 ierr = ISDestroy(&list2);CHKERRQ(ierr); 943 #else 944 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 945 #endif 946 break; 947 case 2: 948 #if defined(PETSC_USE_COMPLEX) 949 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 950 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 951 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 952 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 953 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 954 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 955 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 956 ierr = ISDestroy(&list1);CHKERRQ(ierr); 957 ierr = ISDestroy(&list2);CHKERRQ(ierr); 958 #else 959 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 960 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 961 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 962 //ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 963 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 964 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 965 966 if (dim[1]%2==0) 967 NM = dim[1]+2; 968 else 969 NM = dim[1]+1; 970 971 for (i=0;i<local_n0;i++){ 972 for (j=0;j<dim[1];j++){ 973 tempindx = i*dim[1] + j; 974 tempindx1 = i*NM + j; 975 indx1[tempindx]=local_0_start*dim[1]+tempindx; 976 indx2[tempindx]=low+tempindx1; 977 } 978 } 979 980 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 981 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 982 983 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 984 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 985 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 986 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 987 ierr = ISDestroy(&list1);CHKERRQ(ierr); 988 ierr = ISDestroy(&list2);CHKERRQ(ierr); 989 ierr = PetscFree(indx1);CHKERRQ(ierr); 990 ierr = PetscFree(indx2);CHKERRQ(ierr); 991 #endif 992 break; 993 994 case 3: 995 #if defined(PETSC_USE_COMPLEX) 996 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 997 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 998 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 999 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1000 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1001 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1002 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1003 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1004 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1005 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1006 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1007 #else 1008 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1009 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1010 1011 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1012 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1013 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1014 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1015 1016 if (dim[2]%2==0) 1017 NM = dim[2]+2; 1018 else 1019 NM = dim[2]+1; 1020 1021 for (i=0;i<local_n0;i++){ 1022 for (j=0;j<dim[1];j++){ 1023 for (k=0;k<dim[2];k++){ 1024 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1025 tempindx1 = i*dim[1]*NM + j*NM + k; 1026 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1027 indx2[tempindx]=low+tempindx1; 1028 } 1029 } 1030 } 1031 1032 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1033 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1034 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1035 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1036 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1037 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1038 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1039 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1040 ierr = PetscFree(indx1);CHKERRQ(ierr); 1041 ierr = PetscFree(indx2);CHKERRQ(ierr); 1042 #endif 1043 break; 1044 1045 default: 1046 #if defined(PETSC_USE_COMPLEX) 1047 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1048 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1049 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1050 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1051 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1052 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1053 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1054 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1055 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1056 #else 1057 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1058 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1059 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1060 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1061 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1062 1063 partial_dim = fftw->partial_dim; 1064 1065 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1066 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1067 1068 if (dim[ndim-1]%2==0) 1069 NM = 2; 1070 else 1071 NM = 1; 1072 1073 j = low; 1074 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1075 { 1076 indx1[i] = local_0_start*partial_dim + i; 1077 indx2[i] = j; 1078 if (k%dim[ndim-1]==0) 1079 { j+=NM;} 1080 j++; 1081 } 1082 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1083 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1084 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1085 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1086 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1087 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1088 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1089 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1090 ierr = PetscFree(indx1);CHKERRQ(ierr); 1091 ierr = PetscFree(indx2);CHKERRQ(ierr); 1092 #endif 1093 break; 1094 } 1095 } 1096 1097 return 0; 1098 } 1099 EXTERN_C_END 1100 1101 #undef __FUNCT__ 1102 #define __FUNCT__ "OutputTransformFFT" 1103 PetscErrorCode OutputTransformFFT(Mat A,Vec x,Vec y) 1104 { 1105 PetscErrorCode ierr; 1106 PetscFunctionBegin; 1107 ierr = PetscTryMethod(A,"OutputTransformFFT_C",(Mat,Vec,Vec),(A,x,y));CHKERRQ(ierr); 1108 PetscFunctionReturn(0); 1109 } 1110 1111 /* 1112 OutputTransformFFT_FFTW - Copies the FFTW output to the PETSc vector that user can use 1113 Input A, x, y 1114 A - FFTW matrix 1115 x - FFTW vector 1116 y - PETSc vector that user can read 1117 Options Database Keys: 1118 + -mat_fftw_plannerflags - set FFTW planner flags 1119 1120 Level: intermediate 1121 1122 */ 1123 1124 EXTERN_C_BEGIN 1125 #undef __FUNCT__ 1126 #define __FUNCT__ "OutputTransformFFT_FTTW" 1127 PetscErrorCode OutputTransformFFT_FFTW(Mat A,Vec x,Vec y) 1128 { 1129 PetscErrorCode ierr; 1130 MPI_Comm comm=((PetscObject)A)->comm; 1131 Mat_FFT *fft = (Mat_FFT*)A->data; 1132 Mat_FFTW *fftw = (Mat_FFTW*)fft->data; 1133 PetscInt N=fft->N; 1134 PetscInt ndim=fft->ndim,*dim=fft->dim; 1135 PetscInt low; 1136 PetscInt rank,size; 1137 ptrdiff_t alloc_local,local_n0,local_0_start; 1138 ptrdiff_t local_n1,local_1_start; 1139 VecScatter vecscat; 1140 IS list1,list2; 1141 #if !defined(PETSC_USE_COMPLEX) 1142 PetscInt i,j,k,partial_dim; 1143 PetscInt *indx1, *indx2, tempindx, tempindx1; 1144 PetscInt N1, n1 ,NM; 1145 ptrdiff_t temp; 1146 #endif 1147 1148 1149 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1150 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1151 ierr = VecGetOwnershipRange(x,&low,PETSC_NULL);CHKERRQ(ierr); 1152 1153 if (size==1){ 1154 /* 1155 switch (ndim){ 1156 case 1: 1157 ierr = PetscMalloc(sizeof(PetscInt)*dim[0],&indx1);CHKERRQ(ierr); 1158 for (i=0;i<dim[0];i++) 1159 { 1160 indx1[i] = i; 1161 } 1162 ierr = ISCreateGeneral(comm,dim[0],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1163 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1164 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1165 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1166 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1167 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1168 ierr = PetscFree(indx1);CHKERRQ(ierr); 1169 break; 1170 1171 case 2: 1172 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1],&indx1);CHKERRQ(ierr); 1173 for (i=0;i<dim[0];i++){ 1174 for (j=0;j<dim[1];j++){ 1175 indx1[i*dim[1]+j] = i*dim[1] + j; 1176 } 1177 } 1178 ierr = ISCreateGeneral(comm,dim[0]*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1179 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1180 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1181 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1182 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1183 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1184 ierr = PetscFree(indx1);CHKERRQ(ierr); 1185 break; 1186 case 3: 1187 ierr = PetscMalloc(sizeof(PetscInt)*dim[0]*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1188 for (i=0;i<dim[0];i++){ 1189 for (j=0;j<dim[1];j++){ 1190 for (k=0;k<dim[2];k++){ 1191 indx1[i*dim[1]*dim[2]+j*dim[2]+k] = i*dim[1]*dim[2] + j*dim[2] + k; 1192 } 1193 } 1194 } 1195 ierr = ISCreateGeneral(comm,dim[0]*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1196 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1197 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1198 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1199 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1200 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1201 ierr = PetscFree(indx1);CHKERRQ(ierr); 1202 break; 1203 default: 1204 */ 1205 ierr = ISCreateStride(comm,N,0,1,&list1);CHKERRQ(ierr); 1206 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_SELF); 1207 ierr = VecScatterCreate(x,list1,y,list1,&vecscat);CHKERRQ(ierr); 1208 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1209 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1210 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1211 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1212 // break; 1213 // } 1214 } 1215 else{ 1216 1217 switch (ndim){ 1218 case 1: 1219 #if defined(PETSC_USE_COMPLEX) 1220 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_BACKWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1221 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,local_1_start,1,&list1); 1222 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n1,low,1,&list2); 1223 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1224 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1225 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1226 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1227 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1228 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1229 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1230 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1231 #else 1232 SETERRQ(comm,PETSC_ERR_SUP,"No support for real parallel 1D FFT"); 1233 #endif 1234 break; 1235 case 2: 1236 #if defined(PETSC_USE_COMPLEX) 1237 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1238 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],local_0_start*dim[1],1,&list1); 1239 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1],low,1,&list2); 1240 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1241 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1242 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1243 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1244 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1245 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1246 #else 1247 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1248 N1 = 2*dim[0]*(dim[1]/2+1); n1 = 2*local_n0*(dim[1]/2+1); 1249 1250 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx1);CHKERRQ(ierr); 1251 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1],&indx2);CHKERRQ(ierr); 1252 1253 if (dim[1]%2==0) 1254 NM = dim[1]+2; 1255 else 1256 NM = dim[1]+1; 1257 1258 for (i=0;i<local_n0;i++){ 1259 for (j=0;j<dim[1];j++){ 1260 tempindx = i*dim[1] + j; 1261 tempindx1 = i*NM + j; 1262 indx1[tempindx]=local_0_start*dim[1]+tempindx; 1263 indx2[tempindx]=low+tempindx1; 1264 } 1265 } 1266 1267 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1268 ierr = ISCreateGeneral(comm,local_n0*dim[1],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1269 1270 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1271 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1272 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1273 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1274 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1275 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1276 ierr = PetscFree(indx1);CHKERRQ(ierr); 1277 ierr = PetscFree(indx2);CHKERRQ(ierr); 1278 #endif 1279 break; 1280 case 3: 1281 #if defined(PETSC_USE_COMPLEX) 1282 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1283 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],local_0_start*dim[1]*dim[2],1,&list1); 1284 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*dim[1]*dim[2],low,1,&list2); 1285 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1286 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1287 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1288 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1289 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1290 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1291 #else 1292 1293 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1294 N1 = 2*dim[0]*dim[1]*(dim[2]/2+1); n1 = 2*local_n0*dim[1]*(dim[2]/2+1); 1295 1296 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx1);CHKERRQ(ierr); 1297 // ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*N1,&indx2);CHKERRQ(ierr); 1298 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx1);CHKERRQ(ierr); 1299 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*dim[1]*dim[2],&indx2);CHKERRQ(ierr); 1300 1301 if (dim[2]%2==0) 1302 NM = dim[2]+2; 1303 else 1304 NM = dim[2]+1; 1305 1306 for (i=0;i<local_n0;i++){ 1307 for (j=0;j<dim[1];j++){ 1308 for (k=0;k<dim[2];k++){ 1309 tempindx = i*dim[1]*dim[2] + j*dim[2] + k; 1310 tempindx1 = i*dim[1]*NM + j*NM + k; 1311 indx1[tempindx]=local_0_start*dim[1]*dim[2]+tempindx; 1312 indx2[tempindx]=low+tempindx1; 1313 } 1314 // printf("Val tempindx1 = %d\n",tempindx1); 1315 // printf("index1 %d from proc %d is \n",indx1[tempindx],rank); 1316 // printf("index2 %d from proc %d is \n",indx2[tempindx],rank); 1317 // printf("-------------------------\n",indx2[tempindx],rank); 1318 } 1319 } 1320 1321 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1322 ierr = ISCreateGeneral(comm,local_n0*dim[1]*dim[2],indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1323 1324 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1325 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1326 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1327 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1328 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1329 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1330 ierr = PetscFree(indx1);CHKERRQ(ierr); 1331 ierr = PetscFree(indx2);CHKERRQ(ierr); 1332 #endif 1333 break; 1334 default: 1335 #if defined(PETSC_USE_COMPLEX) 1336 alloc_local = fftw_mpi_local_size(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start); 1337 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),local_0_start*(fftw->partial_dim),1,&list1); 1338 ierr = ISCreateStride(PETSC_COMM_WORLD,local_n0*(fftw->partial_dim),low,1,&list2); 1339 //ierr = ISView(list1,PETSC_VIEWER_STDOUT_WORLD); 1340 //ierr = ISView(list2,PETSC_VIEWER_STDOUT_WORLD); 1341 ierr = VecScatterCreate(x,list1,y,list2,&vecscat);CHKERRQ(ierr); 1342 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1343 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1344 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1345 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1346 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1347 #else 1348 temp = (fftw->dim_fftw)[fftw->ndim_fftw-1]; 1349 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp/2 + 1; 1350 alloc_local = fftw_mpi_local_size_transposed(fftw->ndim_fftw,fftw->dim_fftw,comm,&local_n0,&local_0_start,&local_n1,&local_1_start); 1351 N1 = 2*N*(PetscInt)((fftw->dim_fftw)[fftw->ndim_fftw-1])/((PetscInt) temp); 1352 (fftw->dim_fftw)[fftw->ndim_fftw-1] = temp; 1353 1354 partial_dim = fftw->partial_dim; 1355 1356 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx1);CHKERRQ(ierr); 1357 ierr = PetscMalloc(sizeof(PetscInt)*((PetscInt)local_n0)*partial_dim,&indx2);CHKERRQ(ierr); 1358 1359 if (dim[ndim-1]%2==0) 1360 NM = 2; 1361 else 1362 NM = 1; 1363 1364 j = low; 1365 for (i=0,k=1; i<((PetscInt)local_n0)*partial_dim;i++,k++) 1366 { 1367 indx1[i] = local_0_start*partial_dim + i; 1368 indx2[i] = j; 1369 if (k%dim[ndim-1]==0) 1370 { j+=NM;} 1371 j++; 1372 } 1373 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx1,PETSC_COPY_VALUES,&list1);CHKERRQ(ierr); 1374 ierr = ISCreateGeneral(comm,local_n0*partial_dim,indx2,PETSC_COPY_VALUES,&list2);CHKERRQ(ierr); 1375 1376 ierr = VecScatterCreate(x,list2,y,list1,&vecscat);CHKERRQ(ierr); 1377 ierr = VecScatterBegin(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1378 ierr = VecScatterEnd(vecscat,x,y,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1379 ierr = VecScatterDestroy(&vecscat);CHKERRQ(ierr); 1380 ierr = ISDestroy(&list1);CHKERRQ(ierr); 1381 ierr = ISDestroy(&list2);CHKERRQ(ierr); 1382 ierr = PetscFree(indx1);CHKERRQ(ierr); 1383 ierr = PetscFree(indx2);CHKERRQ(ierr); 1384 #endif 1385 break; 1386 } 1387 } 1388 return 0; 1389 } 1390 EXTERN_C_END 1391 1392 EXTERN_C_BEGIN 1393 #undef __FUNCT__ 1394 #define __FUNCT__ "MatCreate_FFTW" 1395 /* 1396 MatCreate_FFTW - Creates a matrix object that provides FFT 1397 via the external package FFTW 1398 Options Database Keys: 1399 + -mat_fftw_plannerflags - set FFTW planner flags 1400 1401 Level: intermediate 1402 1403 */ 1404 1405 PetscErrorCode MatCreate_FFTW(Mat A) 1406 { 1407 PetscErrorCode ierr; 1408 MPI_Comm comm=((PetscObject)A)->comm; 1409 Mat_FFT *fft=(Mat_FFT*)A->data; 1410 Mat_FFTW *fftw; 1411 PetscInt n=fft->n,N=fft->N,ndim=fft->ndim,*dim = fft->dim; 1412 const char *p_flags[]={"FFTW_ESTIMATE","FFTW_MEASURE","FFTW_PATIENT","FFTW_EXHAUSTIVE"}; 1413 PetscBool flg; 1414 PetscInt p_flag,partial_dim=1,ctr; 1415 PetscMPIInt size,rank; 1416 ptrdiff_t *pdim; 1417 ptrdiff_t local_n1,local_1_start; 1418 #if !defined(PETSC_USE_COMPLEX) 1419 ptrdiff_t temp; 1420 PetscInt N1; 1421 #else 1422 PetscInt n1; 1423 #endif 1424 1425 1426 PetscFunctionBegin; 1427 ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr); 1428 ierr = MPI_Comm_rank(comm, &rank);CHKERRQ(ierr); 1429 ierr = MPI_Barrier(PETSC_COMM_WORLD); 1430 1431 pdim = (ptrdiff_t *)calloc(ndim,sizeof(ptrdiff_t)); 1432 pdim[0] = dim[0]; 1433 for (ctr=1;ctr<ndim;ctr++) 1434 { 1435 partial_dim *= dim[ctr]; 1436 pdim[ctr] = dim[ctr]; 1437 } 1438 1439 if (size == 1) { 1440 #if defined(PETSC_USE_COMPLEX) 1441 ierr = MatSetSizes(A,N,N,N,N);CHKERRQ(ierr); 1442 n = N; 1443 #else 1444 PetscInt tot_dim = N*2*(dim[ndim-1]/2+1)/dim[ndim-1]; 1445 ierr = MatSetSizes(A,tot_dim,tot_dim,tot_dim,tot_dim);CHKERRQ(ierr); 1446 n = tot_dim; 1447 #endif 1448 1449 } else { 1450 ptrdiff_t alloc_local,local_n0,local_0_start;//local_1_end; 1451 switch (ndim){ 1452 case 1: 1453 #if !defined(PETSC_USE_COMPLEX) 1454 SETERRQ(comm,PETSC_ERR_SUP,"FFTW does not support parallel 1D real transform"); 1455 #else 1456 alloc_local = fftw_mpi_local_size_1d(dim[0],comm,FFTW_FORWARD,FFTW_ESTIMATE,&local_n0,&local_0_start,&local_n1,&local_1_start); 1457 n = (PetscInt)local_n0; 1458 n1 = (PetscInt) local_n1; 1459 ierr = MatSetSizes(A,n1,n,N,N);CHKERRQ(ierr); 1460 #endif 1461 break; 1462 case 2: 1463 #if defined(PETSC_USE_COMPLEX) 1464 alloc_local = fftw_mpi_local_size_2d(dim[0],dim[1],comm,&local_n0,&local_0_start); 1465 /* 1466 PetscMPIInt rank; 1467 PetscSynchronizedPrintf(comm,"[%d] MatCreateSeqFFTW: local_n0, local_0_start %d %d, N %d,dim %d, %d\n",rank,(PetscInt)local_n0*dim[1],(PetscInt)local_0_start,m,dim[0],dim[1]); 1468 PetscSynchronizedFlush(comm); 1469 */ 1470 n = (PetscInt)local_n0*dim[1]; 1471 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1472 #else 1473 alloc_local = fftw_mpi_local_size_2d_transposed(dim[0],dim[1]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1474 n = 2*(PetscInt)local_n0*(dim[1]/2+1); 1475 // n1 = 2*(PetscInt)local_n1*(dim[0]); 1476 // ierr = MatSetSizes(A,n1,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1477 ierr = MatSetSizes(A,n,n,2*dim[0]*(dim[1]/2+1),2*dim[0]*(dim[1]/2+1)); 1478 #endif 1479 break; 1480 case 3: 1481 #if defined(PETSC_USE_COMPLEX) 1482 alloc_local = fftw_mpi_local_size_3d(dim[0],dim[1],dim[2],comm,&local_n0,&local_0_start); 1483 n = (PetscInt)local_n0*dim[1]*dim[2]; 1484 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1485 #else 1486 alloc_local = fftw_mpi_local_size_3d_transposed(dim[0],dim[1],dim[2]/2+1,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1487 n = 2*(PetscInt)local_n0*dim[1]*(dim[2]/2+1); 1488 // n1 = 2*local_n1*dim[0]*(dim[2]/2+1); 1489 // ierr = MatSetSizes(A,n1,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1490 ierr = MatSetSizes(A,n,n,2*dim[0]*dim[1]*(dim[2]/2+1),2*dim[0]*dim[1]*(dim[2]/2+1)); 1491 #endif 1492 break; 1493 default: 1494 #if defined(PETSC_USE_COMPLEX) 1495 alloc_local = fftw_mpi_local_size(ndim,pdim,comm,&local_n0,&local_0_start); 1496 n = (PetscInt)local_n0*partial_dim; 1497 ierr = MatSetSizes(A,n,n,N,N);CHKERRQ(ierr); 1498 #else 1499 temp = pdim[ndim-1]; 1500 pdim[ndim-1] = temp/2 + 1; 1501 alloc_local = fftw_mpi_local_size_transposed(ndim,pdim,PETSC_COMM_WORLD,&local_n0,&local_0_start,&local_n1,&local_1_start); 1502 n = 2*(PetscInt)local_n0*partial_dim*pdim[ndim-1]/temp; 1503 N1 = 2*N*(PetscInt)pdim[ndim-1]/((PetscInt) temp); 1504 pdim[ndim-1] = temp; 1505 // temp = partial_dim*(dim[ndim-1]/2+1)*dim[0]/(dim[1]*dim[ndim-1]); 1506 // n1 = 2*local_n1*temp; 1507 // ierr = MatSetSizes(A,n1,n,N1,N1);CHKERRQ(ierr); 1508 ierr = MatSetSizes(A,n,n,N1,N1);CHKERRQ(ierr); 1509 #endif 1510 break; 1511 } 1512 } 1513 ierr = PetscObjectChangeTypeName((PetscObject)A,MATFFTW);CHKERRQ(ierr); 1514 ierr = PetscNewLog(A,Mat_FFTW,&fftw);CHKERRQ(ierr); 1515 fft->data = (void*)fftw; 1516 1517 fft->n = n; 1518 fftw->ndim_fftw = (ptrdiff_t)ndim; // This is dimension of fft 1519 fftw->partial_dim = partial_dim; 1520 ierr = PetscMalloc(ndim*sizeof(ptrdiff_t), (ptrdiff_t *)&(fftw->dim_fftw));CHKERRQ(ierr); 1521 for(ctr=0;ctr<ndim;ctr++) (fftw->dim_fftw)[ctr]=dim[ctr]; 1522 1523 fftw->p_forward = 0; 1524 fftw->p_backward = 0; 1525 fftw->p_flag = FFTW_ESTIMATE; 1526 1527 if (size == 1){ 1528 A->ops->mult = MatMult_SeqFFTW; 1529 A->ops->multtranspose = MatMultTranspose_SeqFFTW; 1530 } else { 1531 A->ops->mult = MatMult_MPIFFTW; 1532 A->ops->multtranspose = MatMultTranspose_MPIFFTW; 1533 } 1534 fft->matdestroy = MatDestroy_FFTW; 1535 // if(ndim=1 && size>1) and also if it is complex then getvecs should be attached to MatGetVecs_FFTW1D 1536 //A->ops->getvecs = MatGetVecs_FFTW; 1537 A->assembled = PETSC_TRUE; 1538 PetscObjectComposeFunctionDynamic((PetscObject)A,"MatGetVecsFFTW_C","MatGetVecsFFTW_FFTW",MatGetVecsFFTW_FFTW); 1539 PetscObjectComposeFunctionDynamic((PetscObject)A,"InputTransformFFT_C","InputTransformFFT_FFTW",InputTransformFFT_FFTW); 1540 PetscObjectComposeFunctionDynamic((PetscObject)A,"OutputTransformFFT_C","OutputTransformFFT_FFTW",OutputTransformFFT_FFTW); 1541 1542 /* get runtime options */ 1543 ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"FFTW Options","Mat");CHKERRQ(ierr); 1544 ierr = PetscOptionsEList("-mat_fftw_plannerflags","Planner Flags","None",p_flags,4,p_flags[0],&p_flag,&flg);CHKERRQ(ierr); 1545 if (flg) {fftw->p_flag = (unsigned)p_flag;} 1546 PetscOptionsEnd(); 1547 PetscFunctionReturn(0); 1548 } 1549 EXTERN_C_END 1550 1551 1552 1553 1554