Help us understand the problem. What is going on with this article?

俺のlsが遅すぎる

More than 1 year has passed since last update.

なぜ?

必要に迫られてオレオレなlsを書いたのだがなぜか遅い。大したことしていないのに遅い。

コード

特に変なところはないと思うのだけど...

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <dirent.h>
#include <pwd.h>

int main(int argc, char** argv)
{
    char fpath[PATH_MAX];
    DIR* dir;
    struct dirent* dp;
    struct stat st;
    struct passwd *pw;
    char *dpath;

    dpath = (argc != 2) ? "." : argv[1];

    dir = opendir(dpath);
    while ((dp = readdir(dir)) != NULL) {
        memset(fpath, '\0', sizeof(fpath));
        sprintf(fpath, "%s/%s", dpath, dp->d_name);
        if (stat(fpath, &st) != -1) {
            pw = getpwuid(st.st_uid);
            fprintf(stdout, "{ filename = %s, username = %s }\n", fpath, pw->pw_name);
        }
    }
    closedir(dir);
    exit(EXIT_SUCCESS);
}

ベンチマーク

ゴミファイルを1万作っておく。

$ mkdir too_many_files.d
$ cd too_many_files.d
$ for x in `seq 1000`
> do
> touch testfile-${x}-{0,1,2,3,4,5,6,7,8,9}
> done
$ ls | wc -l
10000

なぜか遅い。lsは魔法でも使ってんのかよ。

$ time ./minls too_many_files.d/ > /dev/null

real    0m0.149s
user    0m0.063s
sys     0m0.086s

$ time ls too_many_files.d/ > /dev/null

real    0m0.046s
user    0m0.043s
sys     0m0.003s

調査

こういうのは自分のコードを疑うべきですから、ltraceで関数呼び出しを調べてましょうか。

$ ltrace -c ./minls too_many_files.d/ > /dev/null
% time     seconds  usecs/call     calls      function
------ ----------- ----------- --------- --------------------
 53.28    6.174203     6174203         1 __libc_start_main
 13.86    1.605601         160     10002 getpwuid
  7.42    0.860083          85     10002 __xstat
  6.52    0.755191          75     10002 fprintf
  6.35    0.736116          73     10002 sprintf
  6.31    0.731490          73     10003 readdir
  6.25    0.724347          72     10002 memset
  0.00    0.000177         177         1 exit
  0.00    0.000174         174         1 opendir
  0.00    0.000089          89         1 closedir
  0.00    0.000084          84         1 exit_group
------ ----------- ----------- --------- --------------------
100.00   11.587555                 60018 total

おおぅ、getpwuid()がめっちゃ遅い。
こいつが遅いのがわかったので、straceをかけてみる。

$ strace -f -e open ./minls .
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
open("/etc/nsswitch.conf", O_RDONLY|O_CLOEXEC) = 4
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 4
open("/lib64/libnss_files.so.2", O_RDONLY|O_CLOEXEC) = 4
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
{ filename = ./., username = dharry }
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
{ filename = ./.., username = dharry }
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
{ filename = ./minls, username = dharry }
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
{ filename = ./test.c, username = dharry }
open("/etc/passwd", O_RDONLY|O_CLOEXEC) = 4
{ filename = ./too_many_files.d, username = dharry }
+++ exited with 0 +++

つまり、getpwuid()を呼び出すたびに/etc/passwdをみているのか。これきっつい。

core-utilsのlsは何してんの?

getpwuid()を呼ぶたびに、パスワードファイル参照されるのは、あまり精神衛生的に気持ちのいいものじゃないね。普通に考えるとキャッシュする実装がいいのかな。

core-utils の ls でもキャッシュしていると。

idcache.c
struct userid
{
  union
    {
      uid_t u;
      gid_t g;
    } id;
  char *name;
  struct userid *next;
};

static struct userid *user_alist;

/* The members of this list have names not in the local passwd file.  */
static struct userid *nouser_alist;

/* Translate UID to a login name, with cache, or NULL if unresolved.  */

char *
getuser (uid_t uid)
{
  register struct userid *tail;
  struct passwd *pwent;

  for (tail = user_alist; tail; tail = tail->next)
    if (tail->id.u == uid)
      return tail->name;

  pwent = getpwuid (uid);
  tail = xmalloc (sizeof *tail);
  tail->id.u = uid;
  tail->name = pwent ? xstrdup (pwent->pw_name) : NULL;

  /* Add to the head of the list, so most recently used is first.  */
  tail->next = user_alist;
  user_alist = tail;
  return tail->name;
}
Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
Comments
No comments
Sign up for free and join this conversation.
If you already have a Qiita account
Why do not you register as a user and use Qiita more conveniently?
You need to log in to use this function. Qiita can be used more conveniently after logging in.
You seem to be reading articles frequently this month. Qiita can be used more conveniently after logging in.
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
ユーザーは見つかりませんでした