xref: /petsc/src/sys/fileio/fretrieve.c (revision d71ae5a4db6382e7f06317b8d368875286fe9008)
1 
2 /*
3       Code for opening and closing files.
4 */
5 #include <petscsys.h>
6 #if defined(PETSC_HAVE_PWD_H)
7   #include <pwd.h>
8 #endif
9 #include <ctype.h>
10 #include <sys/stat.h>
11 #if defined(PETSC_HAVE_UNISTD_H)
12   #include <unistd.h>
13 #endif
14 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
15   #include <sys/utsname.h>
16 #endif
17 #include <fcntl.h>
18 #include <time.h>
19 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
20   #include <sys/systeminfo.h>
21 #endif
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users.
27 
28    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
29 
30 */
31 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state)
32 {
33   PetscFunctionBegin;
34   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
35   PetscCallMPI(PetscFree(count_val));
36   PetscFunctionReturn(MPI_SUCCESS);
37 }
38 
39 /*@C
40    PetscGetTmp - Gets the name of the tmp directory
41 
42    Collective
43 
44    Input Parameters:
45 +  comm - MPI_Communicator that may share /tmp
46 -  len - length of string to hold name
47 
48    Output Parameter:
49 .  dir - directory name
50 
51    Options Database Keys:
52 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
53 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
54 -    -tmp tmpdir - name of the directory you wish to use as /tmp
55 
56    Environmental Variables:
57 +     `PETSC_SHARED_TMP` - indicates the directory is shared among the MPI ranks
58 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is not shared among the MPI ranks
59 -     `PETSC_TMP` - name of the directory you wish to use as /tmp
60 
61    Level: developer
62 
63 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
64 @*/
65 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len)
66 {
67   PetscBool flg;
68 
69   PetscFunctionBegin;
70   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
71   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
72   PetscFunctionReturn(0);
73 }
74 
75 /*@C
76    PetscSharedTmp - Determines if all processors in a communicator share a
77          /tmp or have different ones.
78 
79    Collective
80 
81    Input Parameter:
82 .  comm - MPI_Communicator that may share /tmp
83 
84    Output Parameter:
85 .  shared - `PETSC_TRUE` or `PETSC_FALSE`
86 
87    Options Database Keys:
88 +    -shared_tmp  - indicates the directory is shared among the MPI ranks
89 .    -not_shared_tmp - indicates the directory is not shared among the MPI ranks
90 -    -tmp tmpdir - name of the directory you wish to use as /tmp
91 
92    Environmental Variables:
93 +     `PETSC_SHARED_TMP`  - indicates the directory is shared among the MPI ranks
94 .     `PETSC_NOT_SHARED_TMP` - indicates the directory is not shared among the MPI ranks
95 -     `PETSC_TMP` - name of the directory you wish to use as /tmp
96 
97    Level: developer
98 
99    Notes:
100    Stores the status as a MPI attribute so it does not have
101     to be redetermined each time.
102 
103       Assumes that all processors in a communicator either
104        1) have a common /tmp or
105        2) each has a separate /tmp
106       eventually we can write a fancier one that determines which processors
107       share a common /tmp.
108 
109    This will be very slow on runs with a large number of processors since
110    it requires O(p*p) file opens.
111 
112    If the environmental variable PETSC_TMP is set it will use this directory
113   as the "/tmp" directory.
114 
115 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
116 @*/
117 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared)
118 {
119   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
120   PetscBool          flg, iflg;
121   FILE              *fd;
122   static PetscMPIInt Petsc_Tmp_keyval = MPI_KEYVAL_INVALID;
123   int                err;
124 
125   PetscFunctionBegin;
126   PetscCallMPI(MPI_Comm_size(comm, &size));
127   if (size == 1) {
128     *shared = PETSC_TRUE;
129     PetscFunctionReturn(0);
130   }
131 
132   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
133   if (flg) {
134     *shared = PETSC_TRUE;
135     PetscFunctionReturn(0);
136   }
137 
138   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
139   if (flg) {
140     *shared = PETSC_FALSE;
141     PetscFunctionReturn(0);
142   }
143 
144   if (Petsc_Tmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_Tmp_keyval, NULL));
145 
146   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Tmp_keyval, (void **)&tagvalp, (int *)&iflg));
147   if (!iflg) {
148     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
149 
150     /* This communicator does not yet have a shared tmp attribute */
151     PetscCall(PetscMalloc1(1, &tagvalp));
152     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Tmp_keyval, tagvalp));
153 
154     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
155     if (!iflg) {
156       PetscCall(PetscStrcpy(filename, "/tmp"));
157     } else {
158       PetscCall(PetscStrcpy(filename, tmpname));
159     }
160 
161     PetscCall(PetscStrcat(filename, "/petsctestshared"));
162     PetscCallMPI(MPI_Comm_rank(comm, &rank));
163 
164     /* each processor creates a /tmp file and all the later ones check */
165     /* this makes sure no subset of processors is shared */
166     *shared = PETSC_FALSE;
167     for (i = 0; i < size - 1; i++) {
168       if (rank == i) {
169         fd = fopen(filename, "w");
170         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
171         err = fclose(fd);
172         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
173       }
174       PetscCallMPI(MPI_Barrier(comm));
175       if (rank >= i) {
176         fd = fopen(filename, "r");
177         if (fd) cnt = 1;
178         else cnt = 0;
179         if (fd) {
180           err = fclose(fd);
181           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
182         }
183       } else cnt = 0;
184 
185       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
186       if (rank == i) unlink(filename);
187 
188       if (sum == size) {
189         *shared = PETSC_TRUE;
190         break;
191       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
192     }
193     *tagvalp = (int)*shared;
194     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
195   } else *shared = (PetscBool)*tagvalp;
196   PetscFunctionReturn(0);
197 }
198 
199 /*@C
200   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
201 
202   Collective
203 
204   Input Parameter:
205 . comm - MPI_Communicator that may share working directory
206 
207   Output Parameter:
208 . shared - `PETSC_TRUE` or `PETSC_FALSE`
209 
210   Options Database Keys:
211 + -shared_working_directory - indicates the directory is shared among the MPI ranks
212 - -not_shared_working_directory - indicates the directory is shared among the MPI ranks
213 
214   Environmental Variables:
215 + `PETSC_SHARED_WORKING_DIRECTORY` - indicates the directory is shared among the MPI ranks
216 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is shared among the MPI ranks
217 
218   Level: developer
219 
220   Notes:
221   Stores the status as a MPI attribute so it does not have to be redetermined each time.
222 
223   Assumes that all processors in a communicator either
224 $   1) have a common working directory or
225 $   2) each has a separate working directory
226   eventually we can write a fancier one that determines which processors share a common working directory.
227 
228   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
229 
230 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
231 @*/
232 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared)
233 {
234   PetscMPIInt        size, rank, *tagvalp, sum, cnt, i;
235   PetscBool          flg, iflg;
236   FILE              *fd;
237   static PetscMPIInt Petsc_WD_keyval = MPI_KEYVAL_INVALID;
238   int                err;
239 
240   PetscFunctionBegin;
241   PetscCallMPI(MPI_Comm_size(comm, &size));
242   if (size == 1) {
243     *shared = PETSC_TRUE;
244     PetscFunctionReturn(0);
245   }
246 
247   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
248   if (flg) {
249     *shared = PETSC_TRUE;
250     PetscFunctionReturn(0);
251   }
252 
253   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
254   if (flg) {
255     *shared = PETSC_FALSE;
256     PetscFunctionReturn(0);
257   }
258 
259   if (Petsc_WD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_WD_keyval, NULL));
260 
261   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_WD_keyval, (void **)&tagvalp, (int *)&iflg));
262   if (!iflg) {
263     char filename[PETSC_MAX_PATH_LEN];
264 
265     /* This communicator does not yet have a shared  attribute */
266     PetscCall(PetscMalloc1(1, &tagvalp));
267     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_WD_keyval, tagvalp));
268 
269     PetscCall(PetscGetWorkingDirectory(filename, 240));
270     PetscCall(PetscStrcat(filename, "/petsctestshared"));
271     PetscCallMPI(MPI_Comm_rank(comm, &rank));
272 
273     /* each processor creates a  file and all the later ones check */
274     /* this makes sure no subset of processors is shared */
275     *shared = PETSC_FALSE;
276     for (i = 0; i < size - 1; i++) {
277       if (rank == i) {
278         fd = fopen(filename, "w");
279         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
280         err = fclose(fd);
281         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
282       }
283       PetscCallMPI(MPI_Barrier(comm));
284       if (rank >= i) {
285         fd = fopen(filename, "r");
286         if (fd) cnt = 1;
287         else cnt = 0;
288         if (fd) {
289           err = fclose(fd);
290           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
291         }
292       } else cnt = 0;
293 
294       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
295       if (rank == i) unlink(filename);
296 
297       if (sum == size) {
298         *shared = PETSC_TRUE;
299         break;
300       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
301     }
302     *tagvalp = (int)*shared;
303   } else *shared = (PetscBool)*tagvalp;
304   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
305   PetscFunctionReturn(0);
306 }
307 
308 /*@C
309     PetscFileRetrieve - Obtains a file from a URL or compressed
310         and copies into local disk space as uncompressed.
311 
312     Collective
313 
314     Input Parameters:
315 +   comm     - processors accessing the file
316 .   url      - name of file, including entire URL (with or without .gz)
317 -   llen     - length of localname
318 
319     Output Parameters:
320 +   localname - name of local copy of file - valid on only process zero
321 -   found - if found or retrieved the file - valid on all processes
322 
323     Note:
324     if the file already exists local this function just returns without downloading it.
325 
326     Level: intermediate
327 @*/
328 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found)
329 {
330   char        buffer[PETSC_MAX_PATH_LEN], *par, *tlocalname, name[PETSC_MAX_PATH_LEN];
331   FILE       *fp;
332   PetscMPIInt rank;
333   size_t      len = 0;
334   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
335 
336   PetscFunctionBegin;
337   PetscCallMPI(MPI_Comm_rank(comm, &rank));
338   if (rank == 0) {
339     *found = PETSC_FALSE;
340 
341     PetscCall(PetscStrstr(url, ".gz", &par));
342     if (par) {
343       PetscCall(PetscStrlen(par, &len));
344       if (len == 3) compressed = PETSC_TRUE;
345     }
346 
347     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
348     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
349     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
350     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
351     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
352 
353     if (!download && !compressed) {
354       PetscCall(PetscStrncpy(localname, url, llen));
355       PetscCall(PetscTestFile(url, 'r', found));
356       if (*found) {
357         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
358       } else {
359         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
360       }
361       goto done;
362     }
363 
364     /* look for uncompressed file in requested directory */
365     if (compressed) {
366       PetscCall(PetscStrncpy(localname, url, llen));
367       PetscCall(PetscStrstr(localname, ".gz", &par));
368       *par = 0; /* remove .gz extension */
369       PetscCall(PetscTestFile(localname, 'r', found));
370       if (*found) goto done;
371     }
372 
373     /* look for file in current directory */
374     PetscCall(PetscStrrchr(url, '/', &tlocalname));
375     PetscCall(PetscStrncpy(localname, tlocalname, llen));
376     if (compressed) {
377       PetscCall(PetscStrstr(localname, ".gz", &par));
378       *par = 0; /* remove .gz extension */
379     }
380     PetscCall(PetscTestFile(localname, 'r', found));
381     if (*found) goto done;
382 
383     if (download) {
384       /* local file is not already here so use curl to get it */
385       PetscCall(PetscStrncpy(localname, tlocalname, llen));
386       PetscCall(PetscStrcpy(buffer, "curl --fail --silent --show-error "));
387       PetscCall(PetscStrcat(buffer, url));
388       PetscCall(PetscStrcat(buffer, " > "));
389       PetscCall(PetscStrcat(buffer, localname));
390 #if defined(PETSC_HAVE_POPEN)
391       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
392       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
393 #else
394       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
395 #endif
396       PetscCall(PetscTestFile(localname, 'r', found));
397       if (*found) {
398         FILE *fd;
399         char  buf[1024], *str, *substring;
400 
401         /* check if the file didn't exist so it downloaded an HTML message instead */
402         fd = fopen(localname, "r");
403         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
404         str = fgets(buf, sizeof(buf) - 1, fd);
405         while (str) {
406           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
407           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
408           PetscCall(PetscStrstr(buf, "Not Found", &substring));
409           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
410           str = fgets(buf, sizeof(buf) - 1, fd);
411         }
412         fclose(fd);
413       }
414     } else if (compressed) {
415       PetscCall(PetscTestFile(url, 'r', found));
416       if (!*found) goto done;
417       PetscCall(PetscStrncpy(localname, url, llen));
418     }
419     if (compressed) {
420       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
421       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
422       PetscCall(PetscStrstr(name, ".gz", &par));
423       *par = 0; /* remove .gz extension */
424       /* uncompress file */
425       PetscCall(PetscStrcpy(buffer, "gzip -c -d "));
426       PetscCall(PetscStrcat(buffer, localname));
427       PetscCall(PetscStrcat(buffer, " > "));
428       PetscCall(PetscStrcat(buffer, name));
429 #if defined(PETSC_HAVE_POPEN)
430       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
431       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
432 #else
433       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
434 #endif
435       PetscCall(PetscStrncpy(localname, name, llen));
436       PetscCall(PetscTestFile(localname, 'r', found));
437     }
438   }
439 done:
440   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
441   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
442   PetscFunctionReturn(0);
443 }
444