/**
 * =========================================================================
 * File        : file.cpp
 * Project     : 0 A.D.
 * Description : file layer on top of POSIX. avoids the need for
 *             : absolute paths.
 * =========================================================================
 */

// license: GPL; see lib/license.txt

#include "precompiled.h"
#include "file.h"

#include <vector>
#include <algorithm>
#include <string>

#include "lib/posix/posix_filesystem.h"
#include "lib/posix/posix_aio.h"
#include "lib/posix/posix_mman.h"
#include "lib/adts.h"
#include "lib/sysdep/sysdep.h"
#include "lib/byte_order.h"
#include "lib/allocators.h"
#include "file_internal.h"


ERROR_ASSOCIATE(ERR::FILE_ACCESS, "Insufficient access rights to open file", EACCES);
ERROR_ASSOCIATE(ERR::DIR_END, "End of directory reached (no more files)", -1);
ERROR_ASSOCIATE(ERR::FILE_NOT_MAPPED, "File was not mapped", -1);

// rationale for aio, instead of only using mmap:
// - parallelism: instead of just waiting for the transfer to complete,
//   other work can be done in the meantime.
//   example: decompressing from a Zip archive is practically free,
//   because we inflate one block while reading the next.
// - throughput: with aio, the drive always has something to do, as opposed
//   to read requests triggered by the OS for mapped files, which come
//   in smaller chunks. this leads to much higher transfer rates.
// - memory: when used with VFS, aio makes better use of a file cache.
//   data is generally compressed in an archive. a cache should store the
//   decompressed and decoded (e.g. TGA colour swapping) data; mmap would
//   keep the original, compressed data in memory, which doesn't help.
//   we bypass the OS file cache via aio, and store partial blocks here (*);
//   higher level routines will cache the actual useful data.
//   * requests for part of a block are usually followed by another.



// layer on top of POSIX opendir/readdir/closedir that handles
// portable -> native path conversion, ignores non-file/directory entries,
// and additionally returns the file status (size and mtime).

// rationale: see DirIterator definition in header.
struct PosixDirIterator
{
    DIR* os_dir;

    // to support stat(), we need to either chdir or store the complete path.
    // the former is unacceptable because it isn't thread-safe. therefore,
    // we latch dir_open's path and append entry name every dir_next_ent call.
    // this is also the storage to which DirEnt.name points!
    // PathPackage avoids repeated memory allocs and strlen() overhead.
    //
    // it can't be stored here directly because then the struct would
    // no longer fit in HDATA; we'll allocate it separately.
    PathPackage* pp;
};

cassert(sizeof(PosixDirIterator) <= DIR_ITERATOR_OPAQUE_SIZE);

static SingleAllocator<PathPackage> pp_allocator;


// prepare to iterate (once) over entries in the given directory.
// if INFO::OK is returned, <d> is ready for subsequent dir_next_ent calls and
// must be freed via dir_close.
LibError dir_open(const char* P_path, DirIterator* di)
{
    PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;

    char n_path[PATH_MAX];
    // HACK: allow calling with a full (absolute) native path.
    // (required by wdll_ver).
#if OS_WIN
    if(P_path[1] == ':' && P_path[2] == '\\')
        strcpy_s(n_path, ARRAY_SIZE(n_path), P_path);
    else
#endif
    {
        // note: copying to n_path and then pp.path is inefficient but
        // more clear/robust. this is only called a few hundred times anyway.
        RETURN_ERR(file_make_full_native_path(P_path, n_path));
    }

    pdi->pp = pp_allocator.alloc();
    if(!pdi->pp)
        WARN_RETURN(ERR::NO_MEM);

    errno = 0;
    pdi->os_dir = opendir(n_path);
    if(!pdi->os_dir)
        return LibError_from_errno();

    (void)path_package_set_dir(pdi->pp, n_path);
    return INFO::OK;
}


// return ERR::DIR_END if all entries have already been returned once,
// another negative error code, or INFO::OK on success, in which case <ent>
// describes the next (order is unspecified) directory entry.
LibError dir_next_ent(DirIterator* di, DirEnt* ent)
{
    PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;

get_another_entry:
    errno = 0;
    struct dirent* os_ent = readdir(pdi->os_dir);
    if(!os_ent)
    {
        // no error, just no more entries to return
        if(!errno)
            return ERR::DIR_END;    // NOWARN
        return LibError_from_errno();
    }

    // copy os_ent.name[]; we need it for stat() #if !OS_WIN and
    // return it as ent.name (since os_ent.name[] is volatile).
    path_package_append_file(pdi->pp, os_ent->d_name);
    const char* name = pdi->pp->end;

    // get file information (mode, size, mtime)
    struct stat s;
#if OS_WIN
    // .. wposix readdir has enough information to return dirent
    //    status directly (much faster than calling stat).
    CHECK_ERR(readdir_stat_np(pdi->os_dir, &s));
#else
    // .. call regular stat().
    //    we need the full pathname for this. don't use path_append because
    //    it would unnecessarily call strlen.

    CHECK_ERR(stat(pdi->pp->path, &s));
#endif

    // skip "undesirable" entries that POSIX readdir returns:
    if(S_ISDIR(s.st_mode))
    {
        // .. dummy directory entries ("." and "..")
        if(name[0] == '.' && (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')))
            goto get_another_entry;

        s.st_size = -1; // our way of indicating it's a directory
    }
    // .. neither dir nor file
    else if(!S_ISREG(s.st_mode))
        goto get_another_entry;

    ent->size  = s.st_size;
    ent->mtime = s.st_mtime;
    ent->name  = name;
    return INFO::OK;
}


// indicate the directory iterator is no longer needed; all resources it
// held are freed.
LibError dir_close(DirIterator* di)
{
    PosixDirIterator* pdi = (PosixDirIterator*)di->opaque;
    pp_allocator.release(pdi->pp);

    errno = 0;
    if(closedir(pdi->os_dir) < 0)
        return LibError_from_errno();
    return INFO::OK;
}


bool dir_exists(const char* P_path)
{
    // modified from file_stat_impl - we don't want errors to be raised here.
    char N_path[PATH_MAX];
    THROW_ERR(file_make_full_native_path(P_path, N_path));

    // if path ends in slash, remove it (required by stat)
    char* last_char = N_path+strlen(N_path)-1;
    if(path_is_dir_sep(*last_char))
        *last_char = '\0';

    struct stat s;
    if(stat(N_path, &s) != 0)
        return false;
    debug_assert(S_ISDIR(s.st_mode));
    return true;
}


LibError dir_create(const char* P_path)
{
    char N_path[PATH_MAX];
    RETURN_ERR(file_make_full_native_path(P_path, N_path));

    struct stat s;
    int ret = stat(N_path, &s);
    if(ret == 0)
        return INFO::ALREADY_EXISTS;

    errno = 0;
    ret = mkdir(N_path, S_IRWXO|S_IRWXU|S_IRWXG); 
    return LibError_from_posix(ret);
}


// note: we have to recursively empty the directory before it can
// be deleted (required by Windows and POSIX rmdir()).
LibError dir_delete(const char* P_path)
{
    char N_path[PATH_MAX];
    RETURN_ERR(file_make_full_native_path(P_path, N_path));
    PathPackage N_pp;
    RETURN_ERR(path_package_set_dir(&N_pp, N_path));

    DirIterator di;
    RETURN_ERR(dir_open(P_path, &di));

    LibError ret;

    for(;;)
    {
        DirEnt ent;
        ret = dir_next_ent(&di, &ent);
        if(ret == ERR::DIR_END)
            break;
        if(ret != INFO::OK) goto fail;

        if(DIRENT_IS_DIR(&ent))
        {
            char P_subdir[PATH_MAX];
            ret = path_append(P_subdir, P_path, ent.name);
            if(ret != INFO::OK) goto fail;
            ret = dir_delete(P_subdir);
            if(ret != INFO::OK) goto fail;
        }
        else
        {
            ret = path_package_append_file(&N_pp, ent.name);
            if(ret != INFO::OK) goto fail;

            errno = 0;
            int posix_ret = unlink(N_pp.path);
            ret = LibError_from_posix(posix_ret);
            if(ret != INFO::OK) goto fail;
        }
    }

    // must happen before rmdir
    RETURN_ERR(dir_close(&di));

    {
        errno = 0;
        int posix_ret = rmdir(N_path);
        return LibError_from_posix(posix_ret);
    }

fail:
    RETURN_ERR(dir_close(&di));
    return ret;
}


// get file information. output param is zeroed on error.
static LibError file_stat_impl(const char* fn, struct stat* s, bool warn_if_failed = true)
{
    memset(s, 0, sizeof(struct stat));

    char N_fn[PATH_MAX];
    RETURN_ERR(file_make_full_native_path(fn, N_fn));

    errno = 0;
    int ret = stat(N_fn, s);
    return LibError_from_posix(ret, warn_if_failed);
}

LibError file_stat(const char* fn, struct stat* s)
{
    return file_stat_impl(fn, s);
}

// does the given file exist? (implemented via file_stat)
bool file_exists(const char* fn)
{
    struct stat s;
    const bool warn_if_failed = false;
    return file_stat_impl(fn, &s, warn_if_failed) == INFO::OK;
}


// permanently delete the file. be very careful with this!
LibError file_delete(const char* fn)
{
    char N_fn[PATH_MAX+1];
    RETURN_ERR(file_make_full_native_path(fn, N_fn));

    errno = 0;
    int ret = unlink(N_fn);
    return LibError_from_posix(ret);
}


///////////////////////////////////////////////////////////////////////////////
//
// file open/close
// stores information about file (e.g. size) in File struct
//
///////////////////////////////////////////////////////////////////////////////

// interface rationale:
// - this module depends on the handle manager for IO management,
//   but should be useable without the VFS (even if they are designed
//   to work together).
// - allocating a Handle for the file info would solve several problems
//   (see below), but we don't want to allocate 2..3 (VFS, file, Zip file)
//   for every file opened - that'd add up quickly.
//   the Files are always freed at exit though, since they're part of
//   VFile handles in the VFS.
// - we want the VFS open logic to be triggered on file invalidate
//   (if the dev. file is deleted, we should use what's in the archives).
//   we don't want to make this module depend on VFS, so we don't
//   have access to the file location DB; VFS needs to allocate the handle.
// - no problem exposing our internals via File struct -
//   we're only used by the VFS and Zip modules. don't bother making
//   an opaque struct - that'd have to be kept in sync with the real thing.
// - when Zip opens its archives via file_open, a handle isn't needed -
//   the Zip module hides its File struct (required to close the file),
//   and the Handle approach doesn't guard against some idiot calling
//   close(our_fd_value) directly, either.


struct PosixFile
{
    int fd;

    // for reference counted memory-mapping
    void* mapping;
    uint map_refs;
};
cassert(sizeof(PosixFile) < FILE_OPAQUE_SIZE);

int file_fd_from_PosixFile(File* f)
{
    const PosixFile* pf = (const PosixFile*)f->opaque;
    return pf->fd;
}


LibError file_validate(const File* f)
{
    if(!f)
        WARN_RETURN(ERR::INVALID_PARAM);
    const PosixFile* pf = (PosixFile*)f->opaque;
    if(pf->fd < 0)
        WARN_RETURN(ERR::_1);
    // mapped but refcount is invalid
    else if((pf->mapping != 0) ^ (pf->map_refs != 0))
        WARN_RETURN(ERR::_2);
    // note: don't check atom_fn - that complains at the end of
    // file_open if flags & FILE_DONT_SET_FN and has no benefit, really.

    return INFO::OK;
}


LibError file_open(const char* P_fn, uint flags, File* f)
{
    // zero output param in case we fail below.
    memset(f, 0, sizeof(*f));

    if(flags > FILE_FLAG_ALL)
        WARN_RETURN(ERR::INVALID_PARAM);

    char N_fn[PATH_MAX];
    RETURN_ERR(file_make_full_native_path(P_fn, N_fn));

    // don't stat if opening for writing - the file may not exist yet
    off_t size = 0;

    int oflag = O_RDONLY;
    if(flags & FILE_WRITE)
        oflag = O_WRONLY|O_CREAT|O_TRUNC;
    // read access requested
    else
    {
        // get file size
        struct stat s;
        if(stat(N_fn, &s) < 0)
            WARN_RETURN(ERR::TNODE_NOT_FOUND);
        size = s.st_size;

        // note: despite increased overhead, the AIO read method is still
        // significantly faster, even with small files.
        // we therefore don't automatically disable AIO.
        // notes:
        // - up to 32KB can be read by one SCSI request.
        // - flags are stored below and will influence file_io.
        //if(size <= 32*KiB)
        //  flags |= FILE_NO_AIO;

        // make sure <N_fn> is a regular file
        if(!S_ISREG(s.st_mode))
            WARN_RETURN(ERR::TNODE_WRONG_TYPE);
    }

#if OS_WIN
    if(flags & FILE_TEXT)
        oflag |= O_TEXT_NP;
    else
        oflag |= O_BINARY_NP;

    // if AIO is disabled at user's behest, so inform wposix.
    if(flags & FILE_NO_AIO)
        oflag |= O_NO_AIO_NP;
#endif

    int fd = open(N_fn, oflag, S_IRWXO|S_IRWXU|S_IRWXG);
    if(fd < 0)
        WARN_RETURN(ERR::FILE_ACCESS);

    f->flags = flags;
    f->size  = size;
    // see FILE_DONT_SET_FN decl.
    if(!(flags & FILE_DONT_SET_FN))
        f->atom_fn = file_make_unique_fn_copy(P_fn);
    PosixFile* pf = (PosixFile*)f->opaque;
    pf->mapping  = 0;
    pf->map_refs = 0;
    pf->fd       = fd;
    CHECK_FILE(f);

    return INFO::OK;
}


LibError file_close(File* f)
{
    CHECK_FILE(f);
    PosixFile* pf = (PosixFile*)f->opaque;

    // make sure the mapping is actually freed,
    // regardless of how many references remain.
    if(pf->map_refs > 1)
        pf->map_refs = 1;
    if(pf->mapping) // only free if necessary (unmap complains if not mapped)
        file_unmap(f);

    // return final file size (required by VFS after writing files).
    // this is much easier than updating when writing, because we'd have
    // to add accounting code to both (sync and async) paths.
    f->size = lseek(pf->fd, 0, SEEK_END);

    // (check fd to avoid BoundsChecker warning about invalid close() param)
    if(pf->fd != -1)
    {
        close(pf->fd);
        pf->fd = -1;
    }

    // wipe out any cached blocks. this is necessary to cover the (rare) case
    // of file cache contents predating the file write.
    if(f->flags & FILE_WRITE)
        file_cache_invalidate(f->atom_fn);

    return INFO::OK;
}


///////////////////////////////////////////////////////////////////////////////
//
// memory mapping
//
///////////////////////////////////////////////////////////////////////////////


// no significance aside from preventing uint overflow.
static const uint MAX_MAP_REFS = 255;


// map the entire file <f> into memory. if already currently mapped,
// return the previous mapping (reference-counted).
// output parameters are zeroed on failure.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
//
// rationale: reference counting is required for zip_map: several
// Zip "mappings" each reference one ZArchive's actual file mapping.
// implement it here so that we also get refcounting for normal files.
LibError file_map(File* f, void*& p, size_t& size)
{
    p = 0;
    size = 0;

    CHECK_FILE(f);
    PosixFile* pf = (PosixFile*)f->opaque;

    const int prot = (f->flags & FILE_WRITE)? PROT_WRITE : PROT_READ;

    // already mapped - increase refcount and return previous mapping.
    if(pf->mapping)
    {
        // prevent overflow; if we have this many refs, should find out why.
        if(pf->map_refs >= MAX_MAP_REFS)
            WARN_RETURN(ERR::LIMIT);
        pf->map_refs++;
        goto have_mapping;
    }

    // don't allow mapping zero-length files (doesn't make sense,
    // and BoundsChecker warns about wposix mmap failing).
    // then again, don't complain, because this might happen when mounting
    // a dir containing empty files; each is opened as a Zip file.
    if(f->size == 0)
        return ERR::FAIL;   // NOWARN

    errno = 0;
    pf->mapping = mmap(0, f->size, prot, MAP_PRIVATE, pf->fd, (off_t)0);
    if(pf->mapping == MAP_FAILED)
        return LibError_from_errno();

    pf->map_refs = 1;

have_mapping:
    p = pf->mapping;
    size = f->size;
    return INFO::OK;
}


// decrement the reference count for the mapping belonging to file <f>.
// fail if there are no references; remove the mapping if the count reaches 0.
//
// the mapping will be removed (if still open) when its file is closed.
// however, map/unmap calls should still be paired so that the mapping
// may be removed when no longer needed.
LibError file_unmap(File* f)
{
    CHECK_FILE(f);
    PosixFile* pf = (PosixFile*)f->opaque;

    // file is not currently mapped
    if(pf->map_refs == 0)
        WARN_RETURN(ERR::FILE_NOT_MAPPED);

    // still more than one reference remaining - done.
    if(--pf->map_refs > 0)
        return INFO::OK;

    // no more references: remove the mapping
    void* p = pf->mapping;
    pf->mapping = 0;
    // don't clear f->size - the file is still open.

    errno = 0;
    int ret = munmap(p, f->size);
    return LibError_from_posix(ret);
}


LibError file_init()
{
    path_init();
    file_cache_init();
    file_io_init();

    // convenience
    file_sector_size = sys_max_sector_size();

    return INFO::OK;
}

LibError file_shutdown()
{
    file_stats_dump();
    path_shutdown();
    file_io_shutdown();
    return INFO::OK;
}