Faster pdfTeX Startup

/

TL;DR: pdfTeX may spend over 100 ms loading texmf-var/fonts/map/pdftex/updmap/pdftex.map and texmf-dist/ls-R at startup in a full TeX Live install, which is the default. You can skip the default font map file with \pdfmapfile{custom.map}, where custom.map includes all fonts you use. Alternatively, you can interpose fopen with LD_PRELOAD to also avoid the bloated ls-R.


pdfTeX has very high startup overhead: it can take tens of milliseconds to compile a few pages, yet each extra page costs under 1 ms. To better understand what it does, I traced fopen and fclose calls with LD_PRELOAD, logging timestamps and paths, and ls-R and pdftex.map immediately stood out. I found that both are huge text files, over 5 MiB in size, which partly explains why it took so long to load them. Also, most of their contents are unnecessary for pdfTeX to do its job: ls-R is a recursive list of files in texmf-dist, and pdftex.map contains font information. Modifying fopen to use stripped down versions instead (see below) lowers this overhead to less than 1 ms.

// $ gcc -fPIC -shared -O2 preload.c -o preload.so
// $ LD_PRELOAD=./preload.so pdftex gentle.tex >/dev/null

#include <dlfcn.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

static struct node {
    struct node *next;
    char *path;
    FILE *fptr;
} *open_files;

static unsigned int time_ns(void) {
    struct timespec t;
    clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t);
    return t.tv_nsec;
}

FILE *fopen(char const *path, char const *mode) {
    static FILE *(*foo)(char const *, char const *);
    if (!foo) foo = dlsym(RTLD_NEXT, "fopen");

    if (!strcmp(path, "/.../texmf-dist/ls-R"))
        path = "/.../custom/ls-R";
    if (!strcmp(path, "/.../texmf-var/fonts/map/pdftex/updmap/pdftex.map"))
        path = "/.../custom/pdftex.map";

    fprintf(stderr, "%09u fopen  %s, %s\n", time_ns(), path, mode);
    FILE *ret = foo(path, mode);

    struct node *head = malloc(sizeof(*head));
    head->next = open_files;
    head->path = strdup(path);
    head->fptr = ret;
    open_files = head;

    return ret;
}

int fclose(FILE *stream) {
    static int (*foo)(FILE *);
    if (!foo) foo = dlsym(RTLD_NEXT, "fclose");

    char *path = "unknown";
    for (struct node *p = open_files; p; p = p->next) {
        if (p->fptr == stream) {
            path = p->path;
            break;
        }
    }

    fprintf(stderr, "%09u fclose %s\n", time_ns(), path);
    return foo(stream);
}