#include "ModelSync.h" #include "../../nerv/lib/matrix/cuda_helper.h" #include "../../nerv/lib/matrix/generic/elem_type.h" #include "common.h" #include ModelSync* ModelSync_new(void) { ModelSync *self = (ModelSync*)malloc(sizeof(ModelSync)); if (NULL != self) { self->model_mutex = THMutex_new(); self->state_mutex = THMutex_new(); self->initialized_ = false; self->dim_ = 0; self->pos_ = 0; self->data_ = NULL; self->free_data_ = NULL; self->data_ = NULL; self->refcount = 1; self->threadcount = 0; } return self; } ModelSync* ModelSync_newWithId(long id) { ModelSync *self = (ModelSync*)id; __sync_fetch_and_add(&self->refcount, 1); return self; } long ModelSync_id(ModelSync *self) { return (long)(self); } int ModelSync_lockmodel(ModelSync *self) { if(THMutex_lock(self->model_mutex)) return 1; return 0; } int ModelSync_unlockmodel(ModelSync *self) { if(THMutex_unlock(self->model_mutex)) return 1; return 0; } int ModelSync_lockstate(ModelSync *self) { if(THMutex_lock(self->state_mutex)) return 1; return 0; } int ModelSync_unlockstate(ModelSync *self) { if(THMutex_unlock(self->state_mutex)) return 1; return 0; } int ModelSync_free(ModelSync *self) { if (NULL != self && __sync_fetch_and_add(&self->refcount, -1) == 1) { free(self->model_mutex); free(self->state_mutex); Status status; CUDA_SAFE_SYNC_CALL(cudaFreeHost(self->free_data_), &status); free(self); } } int ModelSync_initBuffer(ModelSync *self) { if (NULL != self) { void *free_data = NULL, *data = NULL; size_t size = self->dim_ * sizeof(float)+16; Status status; CUDA_SAFE_SYNC_CALL(cudaHostAlloc((void**) &free_data, size, cudaHostAllocPortable), &status); NERV_SET_STATUS(&status, NERV_NORMAL, 0); data = (free_data ? (void *)( (((unsigned long)*(&free_data)) + 15) & ~0xFUL ) : NULL) ; if (NULL != data) { self->data_ = (float*)(data); self->free_data_ = (float*)(free_data); } return 0; } return 1; } int ModelSync_weightfromd(ModelSync *self, Matrix *dm) { if (NULL != self && NULL != dm) { void *host_data_ = (void*)self->data_; size_t width = dm->ncol * sizeof(float); size_t src_pitch = dm->stride; size_t dst_pitch = src_pitch; Status status; CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(host_data_+self->pos_, dst_pitch, dm->data.f, src_pitch, width, dm->nrow, cudaMemcpyDeviceToHost), &status); NERV_SET_STATUS(&status, NERV_NORMAL, 0); self->pos_ += dm->nrow * dm->stride; return 0; } return 1; } int ModelSync_weighttod(ModelSync *self, Matrix *dm) { if (NULL != self && NULL != dm) { void *host_data_ = (void*)self->data_; size_t width = dm->ncol * sizeof(float); size_t dst_pitch = dm->stride; size_t src_pitch = dst_pitch; Status status; CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(dm->data.f, dst_pitch, host_data_+self->pos_, src_pitch, width, dm->nrow, cudaMemcpyHostToDevice), &status); NERV_SET_STATUS(&status, NERV_NORMAL, 0); self->pos_ += dm->nrow * dm->stride; self->initialized_ = true; return 0; } return 1; } void ModelSync_syncinc(ModelSync *self) { __sync_fetch_and_add(&self->threadcount, 1); } void ModelSync_syncdec(ModelSync *self) { __sync_fetch_and_add(&self->threadcount, -1); } int ModelSync_threadcount(ModelSync *self) { return self->threadcount; } ///////////////////////////////// Xent* Xent_new() { Xent *xent = (Xent*)malloc(sizeof(Xent)); memset(xent, 0, sizeof(Xent)); xent->refcount = 1; return xent; } Xent* Xent_newWithId(long id) { Xent *xent = (Xent*)id; __sync_fetch_and_add(&xent->refcount, 1); return xent; } Xent* Xent_newWithParm(size_t frames_, size_t correct_, double loss_, double entropy_) { Xent *xent = (Xent*)malloc(sizeof(Xent)); xent->frames_ = frames_; xent->correct_ = correct_; xent->loss_ = loss_; xent->entropy_ = entropy_; xent->refcount = 1; return xent; } long Xent_id(Xent *xent) { return (long)(xent); } Xent* Xent_add(Xent *a, Xent *b) { a->frames_ += b->frames_; a->correct_ += b->correct_; a->loss_ += b->loss_; a->entropy_ += b->entropy_; return a; } void Xent_free(Xent *xent) { if (NULL != xent && __sync_fetch_and_add(&xent->refcount, -1) == 1) { free(xent); xent = NULL; } } ////////////////////////////////// Mse* Mse_new() { Mse *mse = (Mse*)malloc(sizeof(Mse)); memset(mse, 0, sizeof(Mse)); mse->refcount = 1; return mse; } Mse* Mse_newWithId(long id) { Mse *mse = (Mse*)id; __sync_fetch_and_add(&mse->refcount, 1); return mse; } Mse* Mse_newWithParm(size_t frames_, double loss_) { Mse *mse = (Mse*)malloc(sizeof(Mse)); mse->frames_ = frames_; mse->loss_ = loss_; mse->refcount = 1; return mse; } long Mse_id(Mse *mse) { return (long)(mse); } Mse* Mse_add(Mse *a, Mse *b) { a->frames_ += b->frames_; a->loss_ += b->loss_; return a; } void Mse_free(Mse *mse) { if (NULL != mse && __sync_fetch_and_add(&mse->refcount, -1) == 1) { free(mse); mse = NULL; } } ////////////////////////////////// GlobalOption* GlobalOption_new() { GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption)); option->refcount = 1; return option; } GlobalOption* GlobalOption_newWithParm(int batch_size, float lrate, bool bp,const char *tr_scp, const char *cv_scp, const char *transf, const char *network) { GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption)); option->batch_size = batch_size; option->lrate = lrate; option->bp = bp; strncpy(option->tr_scp, tr_scp, strlen(tr_scp)+1); strncpy(option->cv_scp, cv_scp, strlen(cv_scp)+1); strncpy(option->transf, transf, strlen(transf)+1); strncpy(option->network, network, strlen(network)+1); option->refcount = 1; return option; } GlobalOption* GlobalOption_newWithId(long id) { GlobalOption *option = (GlobalOption*)id; __sync_fetch_and_add(&option->refcount, 1); return option; } long GlobalOption_id(GlobalOption *option) { return (long)(option); } void GlobalOption_free(GlobalOption *option) { if (NULL != option && __sync_fetch_and_add(&option->refcount, -1) == 1) { free(option); option = NULL; } }