xref: /petsc/src/sys/fileio/fretrieve.c (revision 1b37a2a7cc4a4fb30c3e967db1c694c0a1013f51)
1 /*
2       Code for opening and closing files.
3 */
4 #include <petscsys.h>
5 #if defined(PETSC_HAVE_PWD_H)
6   #include <pwd.h>
7 #endif
8 #include <ctype.h>
9 #include <sys/stat.h>
10 #if defined(PETSC_HAVE_UNISTD_H)
11   #include <unistd.h>
12 #endif
13 #if defined(PETSC_HAVE_SYS_UTSNAME_H)
14   #include <sys/utsname.h>
15 #endif
16 #include <fcntl.h>
17 #include <time.h>
18 #if defined(PETSC_HAVE_SYS_SYSTEMINFO_H)
19   #include <sys/systeminfo.h>
20 #endif
21 #include <petsc/private/petscimpl.h>
22 
23 /*
24    Private routine to delete tmp/shared storage
25 
26    This is called by MPI, not by users, when communicator attributes are deleted
27 
28    Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
29 
30 */
31 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_DelTmpShared(MPI_Comm comm, PetscMPIInt keyval, void *count_val, void *extra_state)
32 {
33   PetscFunctionBegin;
34   PetscCallMPI(PetscInfo(NULL, "Deleting tmp/shared data in an MPI_Comm %ld\n", (long)comm));
35   PetscCallMPI(PetscFree(count_val));
36   PetscFunctionReturn(MPI_SUCCESS);
37 }
38 
39 // "Unknown section 'Environmental Variables'"
40 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
41 /*@C
42   PetscGetTmp - Gets the name of the "tmp" directory, often this is `/tmp`
43 
44   Collective
45 
46   Input Parameters:
47 + comm - MPI_Communicator that may share tmp
48 - len  - length of string to hold name
49 
50   Output Parameter:
51 . dir - directory name
52 
53   Options Database Keys:
54 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
55 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
56 - -tmp tmpdir     - name of the directory you wish to use as tmp
57 
58   Environmental Variables:
59 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
60 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
61 - `PETSC_TMP`            - name of the directory you wish to use as tmp
62 
63   Level: developer
64 
65 .seealso: `PetscSharedTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
66 @*/
67 PetscErrorCode PetscGetTmp(MPI_Comm comm, char dir[], size_t len)
68 {
69   PetscBool flg;
70 
71   PetscFunctionBegin;
72   PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", dir, len, &flg));
73   if (!flg) PetscCall(PetscStrncpy(dir, "/tmp", len));
74   PetscFunctionReturn(PETSC_SUCCESS);
75 }
76 
77 // "Unknown section 'Environmental Variables'"
78 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
79 /*@C
80   PetscSharedTmp - Determines if all processors in a communicator share a
81   tmp directory or have different ones.
82 
83   Collective
84 
85   Input Parameter:
86 . comm - MPI_Communicator that may share tmp
87 
88   Output Parameter:
89 . shared - `PETSC_TRUE` or `PETSC_FALSE`
90 
91   Options Database Keys:
92 + -shared_tmp     - indicates the directory is known to be shared among the MPI processes
93 . -not_shared_tmp - indicates the directory is known to be not shared among the MPI processes
94 - -tmp tmpdir     - name of the directory you wish to use as tmp
95 
96   Environmental Variables:
97 + `PETSC_SHARED_TMP`     - indicates the directory is known to be shared among the MPI processes
98 . `PETSC_NOT_SHARED_TMP` - indicates the directory is known to be not shared among the MPI processes
99 - `PETSC_TMP`            - name of the directory you wish to use as tmp
100 
101   Level: developer
102 
103   Notes:
104   Stores the status as a MPI attribute so it does not have
105   to be redetermined each time.
106 
107   Assumes that all processors in a communicator either
108   1) have a common tmp or
109   2) each has a separate tmp
110   eventually we can write a fancier one that determines which processors
111   share a common tmp.
112 
113   This will be very slow on runs with a large number of processors since
114   it requires O(p*p) file opens.
115 
116   If the environmental variable `PETSC_TMP` is set it will use this directory
117   as the "tmp" directory.
118 
119 .seealso: `PetscGetTmp()`, `PetscSharedWorkingDirectory()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
120 @*/
121 PetscErrorCode PetscSharedTmp(MPI_Comm comm, PetscBool *shared)
122 {
123   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
124   PetscBool   flg, iflg;
125   FILE       *fd;
126   int         err;
127 
128   PetscFunctionBegin;
129   PetscCallMPI(MPI_Comm_size(comm, &size));
130   if (size == 1) {
131     *shared = PETSC_TRUE;
132     PetscFunctionReturn(PETSC_SUCCESS);
133   }
134 
135   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_TMP", NULL, 0, &flg));
136   if (flg) {
137     *shared = PETSC_TRUE;
138     PetscFunctionReturn(PETSC_SUCCESS);
139   }
140 
141   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_TMP", NULL, 0, &flg));
142   if (flg) {
143     *shared = PETSC_FALSE;
144     PetscFunctionReturn(PETSC_SUCCESS);
145   }
146 
147   if (Petsc_SharedTmp_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedTmp_keyval, NULL));
148 
149   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedTmp_keyval, (void **)&tagvalp, (int *)&iflg));
150   if (!iflg) {
151     char filename[PETSC_MAX_PATH_LEN], tmpname[PETSC_MAX_PATH_LEN];
152 
153     /* This communicator does not yet have a shared tmp attribute */
154     PetscCall(PetscMalloc1(1, &tagvalp));
155     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedTmp_keyval, tagvalp));
156 
157     PetscCall(PetscOptionsGetenv(comm, "PETSC_TMP", tmpname, 238, &iflg));
158     if (!iflg) {
159       PetscCall(PetscStrncpy(filename, "/tmp", sizeof(filename)));
160     } else {
161       PetscCall(PetscStrncpy(filename, tmpname, sizeof(filename)));
162     }
163 
164     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
165     PetscCallMPI(MPI_Comm_rank(comm, &rank));
166 
167     /* each processor creates a /tmp file and all the later ones check */
168     /* this makes sure no subset of processors is shared */
169     *shared = PETSC_FALSE;
170     for (i = 0; i < size - 1; i++) {
171       if (rank == i) {
172         fd = fopen(filename, "w");
173         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
174         err = fclose(fd);
175         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
176       }
177       PetscCallMPI(MPI_Barrier(comm));
178       if (rank >= i) {
179         fd = fopen(filename, "r");
180         if (fd) cnt = 1;
181         else cnt = 0;
182         if (fd) {
183           err = fclose(fd);
184           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
185         }
186       } else cnt = 0;
187 
188       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
189       if (rank == i) unlink(filename);
190 
191       if (sum == size) {
192         *shared = PETSC_TRUE;
193         break;
194       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share /tmp ");
195     }
196     *tagvalp = (int)*shared;
197     PetscCall(PetscInfo(NULL, "processors %s %s\n", (*shared) ? "share" : "do NOT share", (iflg ? tmpname : "/tmp")));
198   } else *shared = (PetscBool)*tagvalp;
199   PetscFunctionReturn(PETSC_SUCCESS);
200 }
201 
202 // "Unknown section 'Environmental Variables'"
203 // PetscClangLinter pragma disable: -fdoc-section-header-unknown
204 /*@C
205   PetscSharedWorkingDirectory - Determines if all processors in a communicator share a working directory or have different ones.
206 
207   Collective
208 
209   Input Parameter:
210 . comm - MPI_Communicator that may share working directory
211 
212   Output Parameter:
213 . shared - `PETSC_TRUE` or `PETSC_FALSE`
214 
215   Options Database Keys:
216 + -shared_working_directory     - indicates the directory is known to be shared among the MPI processes
217 - -not_shared_working_directory - indicates the directory is known to be not shared among the MPI processes
218 
219   Environmental Variables:
220 + `PETSC_SHARED_WORKING_DIRECTORY`     - indicates the directory is known to be shared among the MPI processes
221 - `PETSC_NOT_SHARED_WORKING_DIRECTORY` - indicates the directory is known to be not shared among the MPI processes
222 
223   Level: developer
224 
225   Notes:
226   Stores the status as a MPI attribute so it does not have to be redetermined each time.
227 
228   Assumes that all processors in a communicator either
229 .vb
230    1) have a common working directory or
231    2) each has a separate working directory
232 .ve
233   eventually we can write a fancier one that determines which processors share a common working directory.
234 
235   This will be very slow on runs with a large number of processors since it requires O(p*p) file opens.
236 
237 .seealso: `PetscGetTmp()`, `PetscSharedTmp()`, `PetscGetWorkingDirectory()`, `PetscGetHomeDirectory()`
238 @*/
239 PetscErrorCode PetscSharedWorkingDirectory(MPI_Comm comm, PetscBool *shared)
240 {
241   PetscMPIInt size, rank, *tagvalp, sum, cnt, i;
242   PetscBool   flg, iflg;
243   FILE       *fd;
244   int         err;
245 
246   PetscFunctionBegin;
247   PetscCallMPI(MPI_Comm_size(comm, &size));
248   if (size == 1) {
249     *shared = PETSC_TRUE;
250     PetscFunctionReturn(PETSC_SUCCESS);
251   }
252 
253   PetscCall(PetscOptionsGetenv(comm, "PETSC_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
254   if (flg) {
255     *shared = PETSC_TRUE;
256     PetscFunctionReturn(PETSC_SUCCESS);
257   }
258 
259   PetscCall(PetscOptionsGetenv(comm, "PETSC_NOT_SHARED_WORKING_DIRECTORY", NULL, 0, &flg));
260   if (flg) {
261     *shared = PETSC_FALSE;
262     PetscFunctionReturn(PETSC_SUCCESS);
263   }
264 
265   if (Petsc_SharedWD_keyval == MPI_KEYVAL_INVALID) PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelTmpShared, &Petsc_SharedWD_keyval, NULL));
266 
267   PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_SharedWD_keyval, (void **)&tagvalp, (int *)&iflg));
268   if (!iflg) {
269     char filename[PETSC_MAX_PATH_LEN];
270 
271     /* This communicator does not yet have a shared  attribute */
272     PetscCall(PetscMalloc1(1, &tagvalp));
273     PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_SharedWD_keyval, tagvalp));
274 
275     PetscCall(PetscGetWorkingDirectory(filename, sizeof(filename) - 16));
276     PetscCall(PetscStrlcat(filename, "/petsctestshared", sizeof(filename)));
277     PetscCallMPI(MPI_Comm_rank(comm, &rank));
278 
279     /* each processor creates a  file and all the later ones check */
280     /* this makes sure no subset of processors is shared */
281     *shared = PETSC_FALSE;
282     for (i = 0; i < size - 1; i++) {
283       if (rank == i) {
284         fd = fopen(filename, "w");
285         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_FILE_OPEN, "Unable to open test file %s", filename);
286         err = fclose(fd);
287         PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
288       }
289       PetscCallMPI(MPI_Barrier(comm));
290       if (rank >= i) {
291         fd = fopen(filename, "r");
292         if (fd) cnt = 1;
293         else cnt = 0;
294         if (fd) {
295           err = fclose(fd);
296           PetscCheck(!err, PETSC_COMM_SELF, PETSC_ERR_SYS, "fclose() failed on file");
297         }
298       } else cnt = 0;
299 
300       PetscCall(MPIU_Allreduce(&cnt, &sum, 1, MPI_INT, MPI_SUM, comm));
301       if (rank == i) unlink(filename);
302 
303       if (sum == size) {
304         *shared = PETSC_TRUE;
305         break;
306       } else PetscCheck(sum == 1, PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Subset of processes share working directory");
307     }
308     *tagvalp = (int)*shared;
309   } else *shared = (PetscBool)*tagvalp;
310   PetscCall(PetscInfo(NULL, "processors %s working directory\n", (*shared) ? "shared" : "do NOT share"));
311   PetscFunctionReturn(PETSC_SUCCESS);
312 }
313 
314 /*@C
315   PetscFileRetrieve - Obtains a file from a URL or a compressed file
316   and copies into local disk space as uncompressed.
317 
318   Collective
319 
320   Input Parameters:
321 + comm - processors accessing the file
322 . url  - name of file, including entire URL (with or without .gz)
323 - llen - length of `localname`
324 
325   Output Parameters:
326 + localname - name of local copy of file - valid on only process zero
327 - found     - if found or retrieved the file - valid on all processes
328 
329   Level: developer
330 
331   Note:
332   if the file already exists locally this function just returns without downloading it.
333 
334 .seealso: `PetscDLLibraryRetrieve()`
335 @*/
336 PetscErrorCode PetscFileRetrieve(MPI_Comm comm, const char url[], char localname[], size_t llen, PetscBool *found)
337 {
338   char        buffer[PETSC_MAX_PATH_LEN], *par = NULL, *tlocalname = NULL, name[PETSC_MAX_PATH_LEN];
339   FILE       *fp;
340   PetscMPIInt rank;
341   size_t      len = 0;
342   PetscBool   flg1, flg2, flg3, flg4, download, compressed = PETSC_FALSE;
343 
344   PetscFunctionBegin;
345   PetscCallMPI(MPI_Comm_rank(comm, &rank));
346   if (rank == 0) {
347     *found = PETSC_FALSE;
348 
349     PetscCall(PetscStrstr(url, ".gz", &par));
350     if (par) {
351       PetscCall(PetscStrlen(par, &len));
352       if (len == 3) compressed = PETSC_TRUE;
353     }
354 
355     PetscCall(PetscStrncmp(url, "ftp://", 6, &flg1));
356     PetscCall(PetscStrncmp(url, "http://", 7, &flg2));
357     PetscCall(PetscStrncmp(url, "file://", 7, &flg3));
358     PetscCall(PetscStrncmp(url, "https://", 8, &flg4));
359     download = (PetscBool)(flg1 || flg2 || flg3 || flg4);
360 
361     if (!download && !compressed) {
362       PetscCall(PetscStrncpy(localname, url, llen));
363       PetscCall(PetscTestFile(url, 'r', found));
364       if (*found) {
365         PetscCall(PetscInfo(NULL, "Found file %s\n", url));
366       } else {
367         PetscCall(PetscInfo(NULL, "Did not find file %s\n", url));
368       }
369       goto done;
370     }
371 
372     /* look for uncompressed file in requested directory */
373     if (compressed) {
374       PetscCall(PetscStrncpy(localname, url, llen));
375       PetscCall(PetscStrstr(localname, ".gz", &par));
376       *par = 0; /* remove .gz extension */
377       PetscCall(PetscTestFile(localname, 'r', found));
378       if (*found) goto done;
379     }
380 
381     /* look for file in current directory */
382     PetscCall(PetscStrrchr(url, '/', &tlocalname));
383     PetscCall(PetscStrncpy(localname, tlocalname, llen));
384     if (compressed) {
385       PetscCall(PetscStrstr(localname, ".gz", &par));
386       *par = 0; /* remove .gz extension */
387     }
388     PetscCall(PetscTestFile(localname, 'r', found));
389     if (*found) goto done;
390 
391     if (download) {
392       /* local file is not already here so use curl to get it */
393       PetscCall(PetscStrncpy(localname, tlocalname, llen));
394       PetscCall(PetscStrncpy(buffer, "curl --fail --silent --show-error ", sizeof(buffer)));
395       PetscCall(PetscStrlcat(buffer, url, sizeof(buffer)));
396       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
397       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
398 #if defined(PETSC_HAVE_POPEN)
399       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
400       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
401 #else
402       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
403 #endif
404       PetscCall(PetscTestFile(localname, 'r', found));
405       if (*found) {
406         FILE *fd;
407         char  buf[1024], *str, *substring;
408 
409         /* check if the file didn't exist so it downloaded an HTML message instead */
410         fd = fopen(localname, "r");
411         PetscCheck(fd, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscTestFile() indicates %s exists but fopen() cannot open it", localname);
412         str = fgets(buf, sizeof(buf) - 1, fd);
413         while (str) {
414           PetscCall(PetscStrstr(buf, "<!DOCTYPE html>", &substring));
415           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
416           PetscCall(PetscStrstr(buf, "Not Found", &substring));
417           PetscCheck(!substring, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unable to download %s it does not appear to exist at this URL, dummy HTML file was downloaded", url);
418           str = fgets(buf, sizeof(buf) - 1, fd);
419         }
420         fclose(fd);
421       }
422     } else if (compressed) {
423       PetscCall(PetscTestFile(url, 'r', found));
424       if (!*found) goto done;
425       PetscCall(PetscStrncpy(localname, url, llen));
426     }
427     if (compressed) {
428       PetscCall(PetscStrrchr(localname, '/', &tlocalname));
429       PetscCall(PetscStrncpy(name, tlocalname, PETSC_MAX_PATH_LEN));
430       PetscCall(PetscStrstr(name, ".gz", &par));
431       *par = 0; /* remove .gz extension */
432       /* uncompress file */
433       PetscCall(PetscStrncpy(buffer, "gzip -c -d ", sizeof(buffer)));
434       PetscCall(PetscStrlcat(buffer, localname, sizeof(buffer)));
435       PetscCall(PetscStrlcat(buffer, " > ", sizeof(buffer)));
436       PetscCall(PetscStrlcat(buffer, name, sizeof(buffer)));
437 #if defined(PETSC_HAVE_POPEN)
438       PetscCall(PetscPOpen(PETSC_COMM_SELF, NULL, buffer, "r", &fp));
439       PetscCall(PetscPClose(PETSC_COMM_SELF, fp));
440 #else
441       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Cannot run external programs on this machine");
442 #endif
443       PetscCall(PetscStrncpy(localname, name, llen));
444       PetscCall(PetscTestFile(localname, 'r', found));
445     }
446   }
447 done:
448   PetscCallMPI(MPI_Bcast(found, 1, MPIU_BOOL, 0, comm));
449   PetscCallMPI(MPI_Bcast(localname, llen, MPI_CHAR, 0, comm));
450   PetscFunctionReturn(PETSC_SUCCESS);
451 }
452