#include "ModelSync.h"
#include "../../nerv/lib/matrix/cuda_helper.h"
#include "../../nerv/lib/matrix/generic/elem_type.h"
#include "common.h"
#include <string.h>
ModelSync* ModelSync_new(void)
{
ModelSync *self = (ModelSync*)malloc(sizeof(ModelSync));
if (NULL != self)
{
self->model_mutex = THMutex_new();
self->state_mutex = THMutex_new();
self->initialized_ = false;
self->dim_ = 0;
self->pos_ = 0;
self->data_ = NULL;
self->free_data_ = NULL;
self->data_ = NULL;
self->refcount = 1;
self->threadcount = 0;
}
return self;
}
ModelSync* ModelSync_newWithId(long id)
{
ModelSync *self = (ModelSync*)id;
__sync_fetch_and_add(&self->refcount, 1);
return self;
}
long ModelSync_id(ModelSync *self)
{
return (long)(self);
}
int ModelSync_lockmodel(ModelSync *self)
{
if(THMutex_lock(self->model_mutex))
return 1;
return 0;
}
int ModelSync_unlockmodel(ModelSync *self)
{
if(THMutex_unlock(self->model_mutex))
return 1;
return 0;
}
int ModelSync_lockstate(ModelSync *self)
{
if(THMutex_lock(self->state_mutex))
return 1;
return 0;
}
int ModelSync_unlockstate(ModelSync *self)
{
if(THMutex_unlock(self->state_mutex))
return 1;
return 0;
}
int ModelSync_free(ModelSync *self)
{
if (NULL != self && __sync_fetch_and_add(&self->refcount, -1) == 1)
{
free(self->model_mutex);
free(self->state_mutex);
Status status;
CUDA_SAFE_SYNC_CALL(cudaFreeHost(self->free_data_), &status);
free(self);
}
}
int ModelSync_initBuffer(ModelSync *self)
{
if (NULL != self)
{
void *free_data = NULL, *data = NULL;
size_t size = self->dim_ * sizeof(float)+16;
Status status;
CUDA_SAFE_SYNC_CALL(cudaHostAlloc((void**) &free_data, size, cudaHostAllocPortable), &status);
NERV_SET_STATUS(&status, NERV_NORMAL, 0);
data = (free_data ? (void *)( (((unsigned long)*(&free_data)) + 15) & ~0xFUL ) : NULL) ;
if (NULL != data)
{
self->data_ = (float*)(data);
self->free_data_ = (float*)(free_data);
}
return 0;
}
return 1;
}
int ModelSync_weightfromd(ModelSync *self, Matrix *dm)
{
if (NULL != self && NULL != dm)
{
void *host_data_ = (void*)self->data_;
size_t width = dm->ncol * sizeof(float);
size_t src_pitch = dm->stride;
size_t dst_pitch = src_pitch;
Status status;
CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(host_data_+self->pos_, dst_pitch, dm->data.f, src_pitch, width, dm->nrow, cudaMemcpyDeviceToHost), &status);
NERV_SET_STATUS(&status, NERV_NORMAL, 0);
self->pos_ += dm->nrow * dm->stride;
return 0;
}
return 1;
}
int ModelSync_weighttod(ModelSync *self, Matrix *dm)
{
if (NULL != self && NULL != dm)
{
void *host_data_ = (void*)self->data_;
size_t width = dm->ncol * sizeof(float);
size_t dst_pitch = dm->stride;
size_t src_pitch = dst_pitch;
Status status;
CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(dm->data.f, dst_pitch, host_data_+self->pos_, src_pitch, width, dm->nrow, cudaMemcpyHostToDevice), &status);
NERV_SET_STATUS(&status, NERV_NORMAL, 0);
self->pos_ += dm->nrow * dm->stride;
self->initialized_ = true;
return 0;
}
return 1;
}
void ModelSync_syncinc(ModelSync *self)
{
__sync_fetch_and_add(&self->threadcount, 1);
}
void ModelSync_syncdec(ModelSync *self)
{
__sync_fetch_and_add(&self->threadcount, -1);
}
int ModelSync_threadcount(ModelSync *self)
{
return self->threadcount;
}
/////////////////////////////////
Xent* Xent_new()
{
Xent *xent = (Xent*)malloc(sizeof(Xent));
memset(xent, 0, sizeof(Xent));
xent->refcount = 1;
return xent;
}
Xent* Xent_newWithId(long id)
{
Xent *xent = (Xent*)id;
__sync_fetch_and_add(&xent->refcount, 1);
return xent;
}
Xent* Xent_newWithParm(size_t frames_, size_t correct_, double loss_, double entropy_)
{
Xent *xent = (Xent*)malloc(sizeof(Xent));
xent->frames_ = frames_;
xent->correct_ = correct_;
xent->loss_ = loss_;
xent->entropy_ = entropy_;
xent->refcount = 1;
return xent;
}
long Xent_id(Xent *xent)
{
return (long)(xent);
}
Xent* Xent_add(Xent *a, Xent *b)
{
a->frames_ += b->frames_;
a->correct_ += b->correct_;
a->loss_ += b->loss_;
a->entropy_ += b->entropy_;
return a;
}
void Xent_free(Xent *xent)
{
if (NULL != xent && __sync_fetch_and_add(&xent->refcount, -1) == 1)
{
free(xent);
xent = NULL;
}
}
//////////////////////////////////
Mse* Mse_new()
{
Mse *mse = (Mse*)malloc(sizeof(Mse));
memset(mse, 0, sizeof(Mse));
mse->refcount = 1;
return mse;
}
Mse* Mse_newWithId(long id)
{
Mse *mse = (Mse*)id;
__sync_fetch_and_add(&mse->refcount, 1);
return mse;
}
Mse* Mse_newWithParm(size_t frames_, double loss_)
{
Mse *mse = (Mse*)malloc(sizeof(Mse));
mse->frames_ = frames_;
mse->loss_ = loss_;
mse->refcount = 1;
return mse;
}
long Mse_id(Mse *mse)
{
return (long)(mse);
}
Mse* Mse_add(Mse *a, Mse *b)
{
a->frames_ += b->frames_;
a->loss_ += b->loss_;
return a;
}
void Mse_free(Mse *mse)
{
if (NULL != mse && __sync_fetch_and_add(&mse->refcount, -1) == 1)
{
free(mse);
mse = NULL;
}
}
//////////////////////////////////
GlobalOption* GlobalOption_new()
{
GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption));
option->refcount = 1;
return option;
}
GlobalOption* GlobalOption_newWithParm(int batch_size, float lrate, bool bp,const char *tr_scp, const char *cv_scp, const char *transf, const char *network)
{
GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption));
option->batch_size = batch_size;
option->lrate = lrate;
option->bp = bp;
strncpy(option->tr_scp, tr_scp, strlen(tr_scp)+1);
strncpy(option->cv_scp, cv_scp, strlen(cv_scp)+1);
strncpy(option->transf, transf, strlen(transf)+1);
strncpy(option->network, network, strlen(network)+1);
option->refcount = 1;
return option;
}
GlobalOption* GlobalOption_newWithId(long id)
{
GlobalOption *option = (GlobalOption*)id;
__sync_fetch_and_add(&option->refcount, 1);
return option;
}
long GlobalOption_id(GlobalOption *option)
{
return (long)(option);
}
void GlobalOption_free(GlobalOption *option)
{
if (NULL != option && __sync_fetch_and_add(&option->refcount, -1) == 1)
{
free(option);
option = NULL;
}
}