gawk: Internal File Ops

 
 16.6.2 C Code for 'chdir()' and 'stat()'
 ----------------------------------------
 
 Here is the C code for these extensions.(1)
 
    The file includes a number of standard header files, and then
 includes the 'gawkapi.h' header file, which provides the API
 definitions.  Those are followed by the necessary variable declarations
 Boilerplate::):
 
      #ifdef HAVE_CONFIG_H
      #include <config.h>
      #endif
 
      #include <stdio.h>
      #include <assert.h>
      #include <errno.h>
      #include <stdlib.h>
      #include <string.h>
      #include <unistd.h>
 
      #include <sys/types.h>
      #include <sys/stat.h>
 
      #include "gawkapi.h"
 
      #include "gettext.h"
      #define _(msgid)  gettext(msgid)
      #define N_(msgid) msgid
 
      #include "gawkfts.h"
      #include "stack.h"
 
      static const gawk_api_t *api;    /* for convenience macros to work */
      static awk_ext_id_t ext_id;
      static awk_bool_t init_filefuncs(void);
      static awk_bool_t (*init_func)(void) = init_filefuncs;
      static const char *ext_version = "filefuncs extension: version 1.0";
 
      int plugin_is_GPL_compatible;
 
    By convention, for an 'awk' function 'foo()', the C function that
 implements it is called 'do_foo()'.  The function should have two
 arguments.  The first is an 'int', usually called 'nargs', that
 represents the number of actual arguments for the function.  The second
 is a pointer to an 'awk_value_t' structure, usually named 'result':
 
      /*  do_chdir --- provide dynamically loaded chdir() function for gawk */
 
      static awk_value_t *
      do_chdir(int nargs, awk_value_t *result, struct awk_ext_func *unused)
      {
          awk_value_t newdir;
          int ret = -1;
 
          assert(result != NULL);
 
    The 'newdir' variable represents the new directory to change to,
 which is retrieved with 'get_argument()'.  Note that the first argument
 is numbered zero.
 
    If the argument is retrieved successfully, the function calls the
 'chdir()' system call.  Otherwise, if the 'chdir()' fails, it updates
 'ERRNO':
 
          if (get_argument(0, AWK_STRING, & newdir)) {
              ret = chdir(newdir.str_value.str);
              if (ret < 0)
                  update_ERRNO_int(errno);
          }
 
    Finally, the function returns the return value to the 'awk' level:
 
          return make_number(ret, result);
      }
 
    The 'stat()' extension is more involved.  First comes a function that
 turns a numeric mode into a printable representation (e.g., octal '0644'
 becomes '-rw-r--r--').  This is omitted here for brevity:
 
      /* format_mode --- turn a stat mode field into something readable */
 
      static char *
      format_mode(unsigned long fmode)
      {
          ...
      }
 
    Next comes a function for reading symbolic links, which is also
 omitted here for brevity:
 
      /* read_symlink --- read a symbolic link into an allocated buffer.
         ... */
 
      static char *
      read_symlink(const char *fname, size_t bufsize, ssize_t *linksize)
      {
          ...
      }
 
    Two helper functions simplify entering values in the array that will
 contain the result of the 'stat()':
 
      /* array_set --- set an array element */
 
      static void
      array_set(awk_array_t array, const char *sub, awk_value_t *value)
      {
          awk_value_t index;
 
          set_array_element(array,
                            make_const_string(sub, strlen(sub), & index),
                            value);
 
      }
 
      /* array_set_numeric --- set an array element with a number */
 
      static void
      array_set_numeric(awk_array_t array, const char *sub, double num)
      {
          awk_value_t tmp;
 
          array_set(array, sub, make_number(num, & tmp));
      }
 
    The following function does most of the work to fill in the
 'awk_array_t' result array with values obtained from a valid 'struct
 stat'.  This work is done in a separate function to support the 'stat()'
 function for 'gawk' and also to support the 'fts()' extension, which is
 included in the same file but whose code is not shown here (See
 Extension Sample File Functions).
 
    The first part of the function is variable declarations, including a
 table to map file types to strings:
 
      /* fill_stat_array --- do the work to fill an array with stat info */
 
      static int
      fill_stat_array(const char *name, awk_array_t array, struct stat *sbuf)
      {
          char *pmode;    /* printable mode */
          const char *type = "unknown";
          awk_value_t tmp;
          static struct ftype_map {
              unsigned int mask;
              const char *type;
          } ftype_map[] = {
              { S_IFREG, "file" },
              { S_IFBLK, "blockdev" },
              { S_IFCHR, "chardev" },
              { S_IFDIR, "directory" },
      #ifdef S_IFSOCK
              { S_IFSOCK, "socket" },
      #endif
      #ifdef S_IFIFO
              { S_IFIFO, "fifo" },
      #endif
      #ifdef S_IFLNK
              { S_IFLNK, "symlink" },
      #endif
      #ifdef S_IFDOOR /* Solaris weirdness */
              { S_IFDOOR, "door" },
      #endif
          };
          int j, k;
 
    The destination array is cleared, and then code fills in various
 elements based on values in the 'struct stat':
 
          /* empty out the array */
          clear_array(array);
 
          /* fill in the array */
          array_set(array, "name", make_const_string(name, strlen(name),
                                                     & tmp));
          array_set_numeric(array, "dev", sbuf->st_dev);
          array_set_numeric(array, "ino", sbuf->st_ino);
          array_set_numeric(array, "mode", sbuf->st_mode);
          array_set_numeric(array, "nlink", sbuf->st_nlink);
          array_set_numeric(array, "uid", sbuf->st_uid);
          array_set_numeric(array, "gid", sbuf->st_gid);
          array_set_numeric(array, "size", sbuf->st_size);
          array_set_numeric(array, "blocks", sbuf->st_blocks);
          array_set_numeric(array, "atime", sbuf->st_atime);
          array_set_numeric(array, "mtime", sbuf->st_mtime);
          array_set_numeric(array, "ctime", sbuf->st_ctime);
 
          /* for block and character devices, add rdev,
             major and minor numbers */
          if (S_ISBLK(sbuf->st_mode) || S_ISCHR(sbuf->st_mode)) {
              array_set_numeric(array, "rdev", sbuf->st_rdev);
              array_set_numeric(array, "major", major(sbuf->st_rdev));
              array_set_numeric(array, "minor", minor(sbuf->st_rdev));
          }
 
 The latter part of the function makes selective additions to the
 destination array, depending upon the availability of certain members
 and/or the type of the file.  It then returns zero, for success:
 
      #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
          array_set_numeric(array, "blksize", sbuf->st_blksize);
      #endif
 
          pmode = format_mode(sbuf->st_mode);
          array_set(array, "pmode", make_const_string(pmode, strlen(pmode),
                                                      & tmp));
 
          /* for symbolic links, add a linkval field */
          if (S_ISLNK(sbuf->st_mode)) {
              char *buf;
              ssize_t linksize;
 
              if ((buf = read_symlink(name, sbuf->st_size,
                          & linksize)) != NULL)
                  array_set(array, "linkval",
                            make_malloced_string(buf, linksize, & tmp));
              else
                  warning(ext_id, _("stat: unable to read symbolic link `%s'"),
                          name);
          }
 
          /* add a type field */
          type = "unknown";   /* shouldn't happen */
          for (j = 0, k = sizeof(ftype_map)/sizeof(ftype_map[0]); j < k; j++) {
              if ((sbuf->st_mode & S_IFMT) == ftype_map[j].mask) {
                  type = ftype_map[j].type;
                  break;
              }
          }
 
          array_set(array, "type", make_const_string(type, strlen(type), & tmp));
 
          return 0;
      }
 
    The third argument to 'stat()' was not discussed previously.  This
 argument is optional.  If present, it causes 'do_stat()' to use the
 'stat()' system call instead of the 'lstat()' system call.  This is done
 by using a function pointer: 'statfunc'.  'statfunc' is initialized to
 point to 'lstat()' (instead of 'stat()') to get the file information, in
 case the file is a symbolic link.  However, if the third argument is
 included, 'statfunc' is set to point to 'stat()', instead.
 
    Here is the 'do_stat()' function, which starts with variable
 declarations and argument checking:
 
      /* do_stat --- provide a stat() function for gawk */
 
      static awk_value_t *
      do_stat(int nargs, awk_value_t *result, struct awk_ext_func *unused)
      {
          awk_value_t file_param, array_param;
          char *name;
          awk_array_t array;
          int ret;
          struct stat sbuf;
          /* default is lstat() */
          int (*statfunc)(const char *path, struct stat *sbuf) = lstat;
 
          assert(result != NULL);
 
    Then comes the actual work.  First, the function gets the arguments.
 Next, it gets the information for the file.  If the called function
 ('lstat()' or 'stat()') returns an error, the code sets 'ERRNO' and
 returns:
 
          /* file is first arg, array to hold results is second */
          if (   ! get_argument(0, AWK_STRING, & file_param)
              || ! get_argument(1, AWK_ARRAY, & array_param)) {
              warning(ext_id, _("stat: bad parameters"));
              return make_number(-1, result);
          }
 
          if (nargs == 3) {
              statfunc = stat;
          }
 
          name = file_param.str_value.str;
          array = array_param.array_cookie;
 
          /* always empty out the array */
          clear_array(array);
 
          /* stat the file; if error, set ERRNO and return */
          ret = statfunc(name, & sbuf);
          if (ret < 0) {
              update_ERRNO_int(errno);
              return make_number(ret, result);
          }
 
    The tedious work is done by 'fill_stat_array()', shown earlier.  When
 done, the function returns the result from 'fill_stat_array()':
 
          ret = fill_stat_array(name, array, & sbuf);
 
          return make_number(ret, result);
      }
 
    Finally, it's necessary to provide the "glue" that loads the new
 function(s) into 'gawk'.
 
    The 'filefuncs' extension also provides an 'fts()' function, which we
 omit here (SeeExtension Sample File Functions).  For its sake,
 there is an initialization function:
 
      /* init_filefuncs --- initialization routine */
 
      static awk_bool_t
      init_filefuncs(void)
      {
          ...
      }
 
    We are almost done.  We need an array of 'awk_ext_func_t' structures
 for loading each function into 'gawk':
 
      static awk_ext_func_t func_table[] = {
          { "chdir", do_chdir, 1, 1, awk_false, NULL },
          { "stat",  do_stat, 3, 2, awk_false, NULL },
          ...
      };
 
    Each extension must have a routine named 'dl_load()' to load
 everything that needs to be loaded.  It is simplest to use the
 'dl_load_func()' macro in 'gawkapi.h':
 
      /* define the dl_load() function using the boilerplate macro */
 
      dl_load_func(func_table, filefuncs, "")
 
    And that's it!
 
    ---------- Footnotes ----------
 
    (1) This version is edited slightly for presentation.  See
 'extension/filefuncs.c' in the 'gawk' distribution for the complete
 version.