#include "ModelSync.h"
#include "../../nerv/lib/matrix/cuda_helper.h"
#include "../../nerv/lib/matrix/generic/elem_type.h"
#include "common.h"
#include <string.h>


ModelSync* ModelSync_new(void)
{
	ModelSync *self = (ModelSync*)malloc(sizeof(ModelSync));
	if (NULL != self)
	{
		self->model_mutex = THMutex_new();
		self->state_mutex = THMutex_new();
		self->initialized_ = false;
		self->dim_ = 0;
		self->pos_ = 0;
		self->data_ = NULL;
		self->free_data_ = NULL;
		self->data_ = NULL;
		self->refcount = 1;
		self->threadcount = 0;
	}
	return self;
}

ModelSync* ModelSync_newWithId(long id)
{
	ModelSync *self = (ModelSync*)id;
	__sync_fetch_and_add(&self->refcount, 1);
	return self;
}

long ModelSync_id(ModelSync *self) 
{
	return (long)(self);
}

int  ModelSync_lockmodel(ModelSync *self)
{
	if(THMutex_lock(self->model_mutex))
		return 1;
	return 0;
}
	
int  ModelSync_unlockmodel(ModelSync *self)
{
	if(THMutex_unlock(self->model_mutex))
		return 1;
        return 0;

}
int  ModelSync_lockstate(ModelSync *self)
{
	if(THMutex_lock(self->state_mutex))
		return 1;
        return 0;
}

int  ModelSync_unlockstate(ModelSync *self)
{
	if(THMutex_unlock(self->state_mutex))
		return 1;
        return 0;
}

int  ModelSync_free(ModelSync *self)
{
	if (NULL != self && __sync_fetch_and_add(&self->refcount, -1) == 1)
	{
		free(self->model_mutex);	
		free(self->state_mutex);
		Status status;
		CUDA_SAFE_SYNC_CALL(cudaFreeHost(self->free_data_), &status);
		free(self);
	}
}

int  ModelSync_initBuffer(ModelSync *self)
{
	if (NULL != self)
	{
		void *free_data = NULL, *data = NULL;
		size_t size = self->dim_ * sizeof(float)+16;
		Status status;
		CUDA_SAFE_SYNC_CALL(cudaHostAlloc((void**) &free_data, size, cudaHostAllocPortable), &status);	
		NERV_SET_STATUS(&status, NERV_NORMAL, 0);

		data = (free_data ? (void *)( (((unsigned long)*(&free_data)) + 15) & ~0xFUL ) : NULL) ;
		if (NULL != data)
		{
			self->data_ =  (float*)(data);
			self->free_data_ = (float*)(free_data);
		}
		return 0;
	}
	return 1;
}

int  ModelSync_weightfromd(ModelSync *self, Matrix *dm)
{

	if (NULL != self && NULL != dm)
	{
		void *host_data_ = (void*)self->data_;
		size_t width = dm->ncol * sizeof(float);
		size_t src_pitch = dm->stride;
		size_t dst_pitch = src_pitch;	
		Status status;

		CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(host_data_+self->pos_, dst_pitch, dm->data.f, src_pitch, width, dm->nrow, cudaMemcpyDeviceToHost), &status);
		NERV_SET_STATUS(&status, NERV_NORMAL, 0);
		self->pos_ += dm->nrow * dm->stride;
		return 0;
	}
	return 1;
	
}


int  ModelSync_weighttod(ModelSync *self, Matrix *dm)
{

        if (NULL != self && NULL != dm) 
        {   
		void *host_data_ = (void*)self->data_;
                size_t width = dm->ncol * sizeof(float);
                size_t dst_pitch = dm->stride;
                size_t src_pitch = dst_pitch;   
		Status status;

                CUDA_SAFE_SYNC_CALL(cudaMemcpy2D(dm->data.f, dst_pitch, host_data_+self->pos_, src_pitch, width, dm->nrow, cudaMemcpyHostToDevice), &status);
		NERV_SET_STATUS(&status, NERV_NORMAL, 0);

                self->pos_ += dm->nrow * dm->stride;
		self->initialized_ = true;
		return 0;
        }   
	return 1;
}

void ModelSync_syncinc(ModelSync *self)
{
	__sync_fetch_and_add(&self->threadcount, 1);
}

void ModelSync_syncdec(ModelSync *self)
{
	__sync_fetch_and_add(&self->threadcount, -1);
}

int ModelSync_threadcount(ModelSync *self)
{
	return self->threadcount;
}

/////////////////////////////////

Xent* Xent_new()
{
	Xent *xent = (Xent*)malloc(sizeof(Xent));
	memset(xent, 0, sizeof(Xent));
	xent->refcount = 1;
	return xent;
}

Xent* Xent_newWithId(long id)
{
	Xent *xent = (Xent*)id;
	__sync_fetch_and_add(&xent->refcount, 1);
	return xent;
}

Xent* Xent_newWithParm(size_t frames_, size_t correct_, double loss_, double entropy_)
{
	Xent *xent = (Xent*)malloc(sizeof(Xent));
	xent->frames_ = frames_;
	xent->correct_ = correct_;
	xent->loss_ = loss_;
	xent->entropy_ = entropy_;
	xent->refcount = 1;
	return xent;
}

long Xent_id(Xent *xent)
{
	return (long)(xent);
}

Xent* Xent_add(Xent *a, Xent *b)
{
	a->frames_ += b->frames_;
	a->correct_ += b->correct_;
	a->loss_ += b->loss_;
	a->entropy_ += b->entropy_;
	return a;
}

void Xent_free(Xent *xent)
{	
	if (NULL != xent && __sync_fetch_and_add(&xent->refcount, -1) == 1)
	{ 
		free(xent);
		xent = NULL;
	}
}


//////////////////////////////////

Mse* Mse_new()
{
        Mse *mse = (Mse*)malloc(sizeof(Mse));
        memset(mse, 0, sizeof(Mse));
	mse->refcount = 1;
        return mse;
}

Mse* Mse_newWithId(long id)
{
        Mse *mse = (Mse*)id;
        __sync_fetch_and_add(&mse->refcount, 1);
        return mse;
}

Mse* Mse_newWithParm(size_t frames_, double loss_)
{
        Mse *mse = (Mse*)malloc(sizeof(Mse));
        mse->frames_ = frames_;
        mse->loss_ = loss_;
	mse->refcount = 1;
        return mse;
}


long Mse_id(Mse *mse)
{
        return (long)(mse);
}

Mse* Mse_add(Mse *a, Mse *b)
{
        a->frames_ += b->frames_;
        a->loss_ += b->loss_;
        return a;
}

void Mse_free(Mse *mse)
{
        if (NULL != mse && __sync_fetch_and_add(&mse->refcount, -1) == 1)
        {
                free(mse);
                mse = NULL;
        }
}

//////////////////////////////////

GlobalOption* GlobalOption_new()
{
        GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption));
        option->refcount = 1;
        return option;
}

GlobalOption* GlobalOption_newWithParm(int batch_size, float lrate, bool bp,const char *tr_scp, const char *cv_scp, const char *transf, const char *network)
{
        GlobalOption *option = (GlobalOption*)malloc(sizeof(GlobalOption));
	option->batch_size = batch_size;
	option->lrate = lrate;
	option->bp = bp;
	strncpy(option->tr_scp, tr_scp, strlen(tr_scp)+1);
	strncpy(option->cv_scp, cv_scp, strlen(cv_scp)+1);
	strncpy(option->transf, transf, strlen(transf)+1);
	strncpy(option->network, network, strlen(network)+1);
        option->refcount = 1;

        return option;
}

GlobalOption* GlobalOption_newWithId(long id) 
{
        GlobalOption *option = (GlobalOption*)id;
        __sync_fetch_and_add(&option->refcount, 1); 
        return option;
}


long GlobalOption_id(GlobalOption *option)
{
        return (long)(option);
}

void GlobalOption_free(GlobalOption *option)
{
        if (NULL != option && __sync_fetch_and_add(&option->refcount, -1) == 1)
        {   
                free(option);
                option = NULL;
        }   
}