diff options
author | Determinant <[email protected]> | 2015-06-22 19:01:29 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-06-22 19:01:29 +0800 |
commit | 2497fd9e7a0fae5ee4887890d7a312e0e08a93b8 (patch) | |
tree | 382f97575bd2df9ee6abb1662b11b279fc22d72b /io | |
parent | 196e9b48a3541caccdffc5743001cced70667091 (diff) |
major change: use luarocks to manage project
Diffstat (limited to 'io')
-rw-r--r-- | io/chunk_file.c | 325 | ||||
-rw-r--r-- | io/chunk_file.h | 23 | ||||
-rw-r--r-- | io/init.c | 6 | ||||
-rw-r--r-- | io/init.lua | 55 | ||||
-rw-r--r-- | io/sgd_buffer.lua | 111 |
5 files changed, 0 insertions, 520 deletions
diff --git a/io/chunk_file.c b/io/chunk_file.c deleted file mode 100644 index c0b6b9f..0000000 --- a/io/chunk_file.c +++ /dev/null @@ -1,325 +0,0 @@ -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include "../common.h" -#include "chunk_file.h" - -#define INVALID_FORMAT_ERROR(fn) \ - nerv_error(L, "Invalid chunk file: %s", fn) -#define CHECK_FORMAT(exp, ret, fname) \ - do { \ - if ((exp) != (ret)) INVALID_FORMAT_ERROR(fn); \ - } while (0) -#define CHECK_FILE_OPEN(pfh) \ - do { \ - if ((pfh)->closed) \ - nerv_error(L, "operations on a closed file"); \ - } while (0) - -const char *nerv_chunk_file_tname = "nerv.ChunkFile"; -const char *nerv_chunk_file_handle_tname = "nerv.ChunkFileHandle"; -const char *nerv_chunk_info_tname = "nerv.ChunkInfo"; -const char *nerv_chunk_data_tname = "nerv.ChunkData"; - -#define PARAM_HEADER_SIZE 16 - -enum { - NORMAL, - INVALID_FORMAT, - END_OF_FILE, - SECTION_OVERFLOW, - WRITE_ERROR -}; - -size_t read_chunk_header_plain(FILE *fp, int *status) { - static char buff[PARAM_HEADER_SIZE]; - int i; - size_t size = 0; - *status = NORMAL; - if (fread(buff, 1, PARAM_HEADER_SIZE, fp) != PARAM_HEADER_SIZE) - { - if (feof(fp)) *status = END_OF_FILE; - else *status = INVALID_FORMAT; - } - for (i = 0; i < PARAM_HEADER_SIZE; i++) - if (isdigit(buff[i])) - size = size * 10 + buff[i] - '0'; -/* fprintf(stderr, "header: %lu\n", size); */ - return size; -} - -#define CHECK_WRITE(status) \ - do { \ - if (status == SECTION_OVERFLOW) \ - nerv_error(L, "section overflowed"); \ - else if (status == WRITE_ERROR) \ - nerv_error(L, "error while writing"); \ - } while (0) - -void write_chunk_header_plain(FILE *fp, size_t size, int *status) { - static char buff[PARAM_HEADER_SIZE]; - int i; - *status = NORMAL; - for (i = PARAM_HEADER_SIZE - 3; i > 0; i--, size /= 10) - buff[i] = size % 10 + '0'; - if (size) - { - *status = SECTION_OVERFLOW; - return; - } - buff[0] = '['; - buff[PARAM_HEADER_SIZE - 2] = ']'; - buff[PARAM_HEADER_SIZE - 1] = '\n'; - if (fwrite(buff, 1, PARAM_HEADER_SIZE, fp) != PARAM_HEADER_SIZE) - { - *status = WRITE_ERROR; - return; - } -} - -ChunkData *get_chunk_data(FILE *fp, ChunkInfo *info) { - ChunkData *pcd = (ChunkData *)malloc(sizeof(ChunkData)); - pcd->data = (char *)malloc(info->length); - pcd->fp = fmemopen(pcd->data, info->length, "r"); - assert(fseeko(fp, info->offset, SEEK_SET) == 0); - if (fread(pcd->data, 1, info->length, fp) != (size_t)info->length) - return NULL; - return pcd; -} - -const char *read_chunk_metadata(lua_State *L, FILE *fp, const char *fn) { -#define LINEBUFF_SIZE 1024 - static char buff[7 + LINEBUFF_SIZE] = "return "; - CHECK_FORMAT(fgets(buff + 7, LINEBUFF_SIZE, fp), buff + 7, fn); - /* fprintf(stderr, "metadata: %s\n", buff); */ - return buff; -} - -void write_chunk_metadata(FILE *fp, const char *metadata_str, int *status) { - size_t size = strlen(metadata_str); - *status = NORMAL; - if (fwrite(metadata_str, 1, size, fp) != size || - fprintf(fp, "\n") < 0) - { - *status = WRITE_ERROR; - return; - } - /* fprintf(stderr, "metadata: %s\n", metadata_str); */ -} - - -int nerv_chunk_file_open_write(lua_State *L, const char *fn) { - FILE *fp = fopen(fn, "w"); - ChunkFileHandle *lfp; - if (!fp) nerv_error(L, "Error while opening chunk file: %s", fn); - lfp = (ChunkFileHandle *)malloc(sizeof(ChunkFileHandle)); - lfp->fp = fp; - lfp->closed = 0; - luaT_pushudata(L, lfp, nerv_chunk_file_handle_tname); - lua_setfield(L, -2, "handle"); - luaT_pushmetatable(L, nerv_chunk_file_tname); - lua_setmetatable(L, -2); - return 1; -} - -int nerv_chunk_file_open_read(lua_State *L, const char *fn) { - FILE *fp = fopen(fn, "r"); - int i, status; - size_t chunk_len; - off_t offset; - ChunkFileHandle *lfp; - - if (!fp) nerv_error(L, "Error while opening chunk file: %s", fn); - offset = ftello(fp); - lua_newtable(L); - /* fprintf(stderr, "%d\n", (int)offset); */ - for (i = 0;; offset += chunk_len, i++) - { - ChunkInfo *pci; - /* fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); */ - /* skip to the begining of chunk i */ - CHECK_FORMAT(fseeko(fp, offset, SEEK_SET), 0, fn); - /* read header */ - chunk_len = read_chunk_header_plain(fp, &status); - if (status == END_OF_FILE) break; - else if (status == INVALID_FORMAT) - INVALID_FORMAT_ERROR(fn); - /* read metadata */ - luaL_loadstring(L, read_chunk_metadata(L, fp, fn)); - CHECK_FORMAT(lua_pcall(L, 0, 1, 0), 0, fn); - CHECK_FORMAT(lua_istable(L, -1), 1, fn); - /* stack: obj_table, metadata */ - /* chunk info */ - pci = (ChunkInfo *)malloc(sizeof(ChunkInfo)); - pci->offset = ftello(fp); - pci->length = chunk_len - (pci->offset - offset); - /* fprintf(stderr, "%d + %d (skip %lu)\n", (int)pci->offset, - (int)pci->length, chunk_len); */ - luaT_pushudata(L, pci, nerv_chunk_info_tname); - lua_setfield(L, -2, "chunk"); - /* stack: obj_table, metadata */ - /* get id */ - lua_getfield(L, -1, "id"); - /* stack: obj_table, metadata, id */ - if (!lua_isstring(L, -1)) - nerv_error(L, "id field in metadata must be a string"); - lua_pushvalue(L, -1); - /* stack: obj_table, metadata, id, id */ - lua_gettable(L, -4); - /* stack: obj_table, metadata, id, obj[id] */ - if (!lua_isnil(L, -1)) - nerv_error(L, "conflicting id"); - lua_pop(L, 1); - /* stack: obj_table, metadata, id */ - lua_pushvalue(L, -2); - /* stack: obj_table, metadata, id, metadata */ - lua_settable(L, -4); - /* stack: obj_table, metadata */ - lua_pop(L, 1); - } - lua_setfield(L, -2, "metadata"); - lfp = (ChunkFileHandle *)malloc(sizeof(ChunkFileHandle)); - lfp->fp = fp; - lfp->closed = 0; - luaT_pushudata(L, lfp, nerv_chunk_file_handle_tname); - lua_setfield(L, -2, "handle"); - luaT_pushmetatable(L, nerv_chunk_file_tname); - lua_setmetatable(L, -2); - return 1; -} - -int nerv_chunk_file_new_(lua_State *L, const char *fn, const char *mode) { - int rd = 1, bin = 0; - size_t i, len = strlen(mode); - for (i = 0; i < len; i++) - switch (mode[i]) - { - case 'r': rd = 1; break; - case 'w': rd = 0; break; - case 'b': bin = 1; break; - } - return rd ? nerv_chunk_file_open_read(L, fn) : \ - nerv_chunk_file_open_write(L, fn); -} - -int nerv_chunk_file___init(lua_State *L) { - lua_pushvalue(L, 1); - return nerv_chunk_file_new_(L, luaL_checkstring(L, 2), - luaL_checkstring(L, 3)); -} - -int nerv_chunk_file_new(lua_State *L) { - lua_newtable(L); - return nerv_chunk_file_new_(L, luaL_checkstring(L, 1), - luaL_checkstring(L, 2)); -} - -int nerv_chunk_file_write_chunkdata(lua_State *L) { - ChunkFileHandle *pfh; - int status; - off_t start; - size_t size; - const char *metadata_str = lua_tolstring(L, 2, NULL); - lua_getfield(L, 1, "handle"); - pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); - CHECK_FILE_OPEN(pfh); - start = ftello(pfh->fp); - write_chunk_header_plain(pfh->fp, 0, &status); /* fill zeros */ - CHECK_WRITE(status); - write_chunk_metadata(pfh->fp, metadata_str, &status); - CHECK_WRITE(status); - lua_pushvalue(L, 3); - lua_getfield(L, -1, "write"); - if (!lua_isfunction(L, -1)) - nerv_error(L, "\"write\" method must be implemented"); - lua_pushvalue(L, -2); - lua_pushvalue(L, 4); /* pass handle as parameter to write() */ - lua_call(L, 2, 0); /* let the write() to write */ - lua_pop(L, 1); - size = ftello(pfh->fp) - start; - fseeko(pfh->fp, start, SEEK_SET); - /* write the calced size */ - write_chunk_header_plain(pfh->fp, size, &status); - CHECK_WRITE(status); - fseeko(pfh->fp, 0, SEEK_END); - return 0; -} - -int nerv_chunk_file_get_chunkdata(lua_State *L) { - ChunkFileHandle *pfh; - ChunkInfo *pci; - ChunkData *pcd; - const char *id = luaL_checkstring(L, 2); - - lua_getfield(L, 1, "handle"); - pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); - CHECK_FILE_OPEN(pfh); - lua_pop(L, 1); /* pop handle */ - lua_getfield(L, 1, "metadata"); - /* now stack: self, k, metadata */ - lua_getfield(L, -1, id); - /* now stack: self, k, metadata, kth{} */ - if (lua_isnil(L, -1)) /* no chunck with the id */ - return 0; - lua_getfield(L, -1, "chunk"); - pci = luaT_checkudata(L, -1, nerv_chunk_info_tname); - if (!(pcd = get_chunk_data(pfh->fp, pci))) - nerv_error(L, "unexpected end of file"); - luaT_pushudata(L, pcd, nerv_chunk_data_tname); - return 1; -} - -int nerv_chunk_file_close(lua_State *L) { - ChunkFileHandle *pfh; - lua_getfield(L, 1, "handle"); - pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); - CHECK_FILE_OPEN(pfh); - fclose(pfh->fp); - pfh->closed = 1; - return 0; -} - -int nerv_chunk_file_handle_destroy(lua_State *L) { - ChunkFileHandle *pfh = luaT_checkudata(L, 1, - nerv_chunk_file_handle_tname); - if (!pfh->closed) fclose(pfh->fp); - free(pfh); - return 0; -} - -static int nerv_chunk_info_destroy(lua_State *L) { - ChunkInfo *pci = luaT_checkudata(L, 1, nerv_chunk_info_tname); - free(pci); - return 0; -} - -static int nerv_chunk_data_destroy(lua_State *L) { - ChunkData *pcd = luaT_checkudata(L, 1, nerv_chunk_data_tname); - fclose(pcd->fp); - free(pcd->data); - free(pcd); - return 0; -} - -static const luaL_Reg nerv_chunk_file_methods[] = { - {"get_chunkdata", nerv_chunk_file_get_chunkdata}, - {"_write_chunkdata", nerv_chunk_file_write_chunkdata}, - {"close", nerv_chunk_file_close}, - {"__init", nerv_chunk_file___init}, - {NULL, NULL} -}; - -void nerv_chunk_file_init(lua_State *L) { - luaT_newmetatable(L, nerv_chunk_file_tname, NULL, - nerv_chunk_file_new, - NULL, NULL); - luaL_register(L, NULL, nerv_chunk_file_methods); - lua_pop(L, 1); - luaT_newmetatable(L, nerv_chunk_file_handle_tname, NULL, - NULL, nerv_chunk_file_handle_destroy, NULL); - luaT_newmetatable(L, nerv_chunk_info_tname, NULL, - NULL, nerv_chunk_info_destroy, NULL); - luaT_newmetatable(L, nerv_chunk_data_tname, NULL, - NULL, nerv_chunk_data_destroy, NULL); -} - diff --git a/io/chunk_file.h b/io/chunk_file.h deleted file mode 100644 index 9bae59d..0000000 --- a/io/chunk_file.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef NERV_LAYER_FILE_H -#define NERV_LAYER_FILE_H - -extern const char *nerv_chunk_file_tname; -extern const char *nerv_chunk_file_handle_tname; -extern const char *nerv_chunk_info_tname; -extern const char *nerv_chunk_data_tname; - -typedef struct ChunkFileHandle { - FILE *fp; - int closed; -} ChunkFileHandle; - -typedef struct ChunkInfo { - off_t offset, length; -} ChunkInfo; - -typedef struct ChunkData { - FILE *fp; - char *data; -} ChunkData; - -#endif diff --git a/io/init.c b/io/init.c deleted file mode 100644 index 70585f7..0000000 --- a/io/init.c +++ /dev/null @@ -1,6 +0,0 @@ -#include "../common.h" - -extern void nerv_chunk_file_init(lua_State *L); -void nerv_io_init(lua_State *L) { - nerv_chunk_file_init(L); -} diff --git a/io/init.lua b/io/init.lua deleted file mode 100644 index b722a81..0000000 --- a/io/init.lua +++ /dev/null @@ -1,55 +0,0 @@ -function nerv.ChunkFile:write_chunkdata(metadata, writer) - if type(metadata) ~= "table" then - nerv.error("metadata should be a Lua table") - return - end - return self:_write_chunkdata(table.tostring(metadata), writer) -end - -function nerv.ChunkFile:write_chunk(chunk) - local id = chunk.id - local type = chunk.__typename - if id == nil then - nerv.error("id of chunk %s must be specified", type) - end - self:write_chunkdata({id = id, - type = type, - info = chunk:get_info()}, chunk) -end - -function nerv.ChunkFile:read_chunk(id, global_conf) - if self.metadata == nil then - nerv.error("wrong file opening mode") - end - local metadata = self.metadata[id] - if metadata == nil then - nerv.error("chunk with id %s does not exist", id) - end - local chunk_type = nerv.get_type(metadata.type) - local chunk = chunk_type(id, global_conf) - chunk:set_info(metadata.info) - chunk:read(self:get_chunkdata(id)) - return chunk -end - -local DataReader = nerv.class("nerv.DataReader") - -function DataReader:__init(global_conf, reader_conf) - nerv.error_method_not_implemented() -end - -function DataReader:get_data() - nerv.error_method_not_implemented() -end - -local DataBuffer = nerv.class("nerv.DataBuffer") - -function DataBuffer:__init(global_conf, buffer_conf) - nerv.error_method_not_implemented() -end - -function DataBuffer:get_batch() - nerv.error_method_not_implemented() -end - -require 'io.sgd_buffer' diff --git a/io/sgd_buffer.lua b/io/sgd_buffer.lua deleted file mode 100644 index f4f7dfe..0000000 --- a/io/sgd_buffer.lua +++ /dev/null @@ -1,111 +0,0 @@ -local SGDBuffer = nerv.class("nerv.SGDBuffer", "nerv.DataBuffer") - -function SGDBuffer:__init(global_conf, buffer_conf) - self.gconf = global_conf - self.buffer_size = math.floor(buffer_conf.buffer_size / - global_conf.batch_size) * global_conf.batch_size - self.randomize = buffer_conf.randomize - if self.randomize == nil then - self.randomize = false - end - self.head = 0 - self.tail = 0 - self.readers = {} - for i, reader_spec in ipairs(buffer_conf.readers) do - local buffs = {} - for id, width in pairs(reader_spec.data) do - buffs[id] = {data = global_conf.mmat_type(self.buffer_size, width), - leftover = nil, - width = width} - end - table.insert(self.readers, {buffs = buffs, - reader = reader_spec.reader, - tail = 0, - has_leftover = false}) - end -end - -function SGDBuffer:saturate() - local buffer_size = self.buffer_size - self.head = 0 - self.tail = buffer_size - for i, reader in ipairs(self.readers) do - reader.tail = 0 - if reader.has_leftover then - local lrow - for id, buff in pairs(reader.buffs) do - lrow = buff.leftover:nrow() - if lrow > buffer_size then - nerv.error("buffer size is too small to contain leftovers") - end - buff.data:copy_from(buff.leftover, 0, lrow) - buff.leftover = nil - end - nerv.printf("leftover: %d\n", lrow) - reader.tail = lrow - reader.has_leftover = false - end - while reader.tail < buffer_size do - local data = reader.reader:get_data() - if data == nil then - break - end - local drow = nil - for id, d in pairs(data) do - if drow == nil then - drow = d:nrow() - elseif d:nrow() ~= drow then - nerv.error("reader provides with inconsistent rows of data") - end - end - local remain = buffer_size - reader.tail - if drow > remain then - for id, buff in pairs(reader.buffs) do - local d = data[id] - if d == nil then - nerv.error("reader does not provide data for %s", id) - end - buff.leftover = self.gconf.mmat_type(drow - remain, - buff.width) - buff.leftover:copy_from(d, remain, drow) - end - drow = remain - reader.has_leftover = true - end - for id, buff in pairs(reader.buffs) do - buff.data:copy_from(data[id], 0, drow, reader.tail) - end - reader.tail = reader.tail + drow - end - self.tail = math.min(self.tail, reader.tail) - end - self.rand_map = nerv.MMatrixInt.perm_gen(self.tail) -- generate shuffled index - collectgarbage("collect") - return self.tail >= self.gconf.batch_size -end - -function SGDBuffer:get_data() - local batch_size = self.gconf.batch_size - if self.head >= self.tail then -- buffer is empty - if not self:saturate() then - return nil -- the remaining data cannot build a batch - end - end - if self.head + batch_size > self.tail then - return nil -- the remaining data cannot build a batch - end - local res = {} - for i, reader in ipairs(self.readers) do - for id, buff in pairs(reader.buffs) do - local batch = self.gconf.cumat_type(batch_size, buff.width) - if self.randomize then - batch:copy_rows_fromh_by_idx(buff.data, self.rand_map, self.head) - else - batch:copy_fromh(buff.data, self.head, self.head + batch_size) - end - res[id] = batch - end - end - self.head = self.head + batch_size - return res -end |