#include <sys/stat.h>
#include <sys/types.h>

#include "GIntervalsBigSet2D.h"
#include "rdbutils.h"

//------------------------------------- GIntervalsBigSet2D --------------------------------------

const char *GIntervalsBigSet2D::STAT_COL_NAMES[NUM_STAT_COLS] = {
	"chrom1", "chrom2", "contains_overlaps", "size", "surface"
};

void GIntervalsBigSet2D::init(const char *intervset, SEXP meta, const IntervUtils &iu)
{
	GIntervalsBigSet::init(intervset, iu);

	if (!is2d(meta)) 
		verror("Intervals set %s: expecting 1D intervals", intervset);

	m_size = 0;
	m_surface = 0;
	m_chroms2size.clear();
	m_contains_overlaps.clear();
	m_surfaces.clear();
	m_chroms2size.resize(m_iu->get_chromkey().get_num_chroms() * m_iu->get_chromkey().get_num_chroms(), 0);
	m_contains_overlaps.resize(m_iu->get_chromkey().get_num_chroms() * m_iu->get_chromkey().get_num_chroms(), false);
	m_surfaces.resize(m_iu->get_chromkey().get_num_chroms() * m_iu->get_chromkey().get_num_chroms(), 0);
	m_cur_chromid = m_chroms2size.size();
	m_iter_chromid = -1;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	m_do_sort = false;
	m_iinterval = m_intervals.end();

	if (!isVector(meta) || length(meta) < 1) 
		verror("Invalid format of intervals set %s", intervset);

	SEXP stat = VECTOR_ELT(meta, 0);
	SEXP colnames = getAttrib(stat, R_NamesSymbol);

	if (length(stat) != NUM_STAT_COLS || !isString(colnames) || length(colnames) != NUM_STAT_COLS || strcmp(CHAR(STRING_ELT(colnames, 0)), STAT_COL_NAMES[0]))
		verror("Invalid format of intervals set %s", intervset);

	for (int i = 1; i < NUM_STAT_COLS; ++i) {
		if (length(VECTOR_ELT(stat, i - 1)) != length(VECTOR_ELT(stat, i)) || strcmp(CHAR(STRING_ELT(colnames, i)), STAT_COL_NAMES[i]))
			verror("Invalid format of intervals set %s", intervset);
	}

	SEXP chroms1 = VECTOR_ELT(stat, CHROM1_COL);
	SEXP chrom_levels1 = getAttrib(chroms1, R_LevelsSymbol);
	SEXP chroms2 = VECTOR_ELT(stat, CHROM2_COL);
	SEXP chrom_levels2 = getAttrib(chroms2, R_LevelsSymbol);
	SEXP sizes = VECTOR_ELT(stat, SIZE_COL);
	SEXP surfaces = VECTOR_ELT(stat, SURFACE_COL);
	SEXP contains_overlaps = VECTOR_ELT(stat, CONTAINS_OVERLAPS_COL);

	for (int i = 0; i < length(sizes); ++i) {
		const char *chrom1 = isString(chroms1) ? CHAR(STRING_ELT(chroms1, i)) : CHAR(STRING_ELT(chrom_levels1, INTEGER(chroms1)[i] - 1));
		const char *chrom2 = isString(chroms2) ? CHAR(STRING_ELT(chroms2, i)) : CHAR(STRING_ELT(chrom_levels2, INTEGER(chroms2)[i] - 1));
		int chromid1 = m_iu->chrom2id(chrom1);
		int chromid2 = m_iu->chrom2id(chrom2);
		int64_t size = (int64_t)(isReal(sizes) ? REAL(sizes)[i] : INTEGER(sizes)[i]);
		double surface = REAL(surfaces)[i];
		int idx = chroms2idx(chromid1, chromid2);

		m_chroms2size[idx] = size;
		m_surfaces[idx] = surface;
		m_contains_overlaps[idx] = LOGICAL(contains_overlaps)[i];
		m_size += (size_t)size;
		m_surface += surface;
	}

	m_orig_chroms2size = m_chroms2size;
}

void GIntervalsBigSet2D::load_chrom(int chromid1, int chromid2)
{
	m_iter_chrom_index = 0;
	if (get_num_intervals(chromid1, chromid2) &&
		(m_intervals.empty() || m_intervals.front().chromid1() != chromid1 || m_intervals.front().chromid2() != chromid2))
	{
		string filename = interv2path(m_iu->get_env(), m_intervset);
		filename += "/";
		filename += m_iu->id2chrom(chromid1);
		filename += "-";
		filename += m_iu->id2chrom(chromid2);
		SEXP rintervals = RSaneUnserialize(filename.c_str());
		rprotect(rintervals);
		m_iu->convert_rintervs(rintervals, NULL, &m_intervals);
		runprotect(rintervals);

		// set udata
		size_t offset = 0;
		int idx = chroms2idx(chromid1, chromid2);
		for (int i = 0; i < idx; ++i)
			offset += m_orig_chroms2size[i];
		for (GIntervals2D::iterator iinterval = m_intervals.begin(); iinterval < m_intervals.end(); ++iinterval) 
			iinterval->udata() = (void *)(intptr_t)(iinterval - m_intervals.begin() + offset);

		if (m_do_sort) 
			m_intervals.sort(m_compare);
	} else
		m_intervals.clear();
}

pair<ChromPair, GIntervalsBigSet2D::ChromStat> GIntervalsBigSet2D::get_chrom_stat(GIntervalsFetcher2D *intervals, const IntervUtils &iu)
{
	pair<ChromPair, ChromStat> res(ChromPair(-1, -1), ChromStat());

	if (intervals->size()) {
		if (intervals->num_chrom_pairs() > 1) 
			verror("get_chrom_stat found more than one chromosome pair in the intervals");

		ChromPair &chrompair = res.first;
		intervals->begin_iter();
		chrompair.chromid1 = intervals->cur_interval().chromid1();
		chrompair.chromid2 = intervals->cur_interval().chromid2();

		ChromStat &chromstat = res.second;
		chromstat.size = intervals->size();
		chromstat.surface = intervals->surface();
		try {
			intervals->verify_no_overlaps(iu.get_chromkey());
			chromstat.contains_overlaps = false;
		} catch (TGLException &e) {
			if (e.code() == GIntervalsFetcher2D::OVERLAPPING_INTERVAL) 
				chromstat.contains_overlaps = true;
			else
				throw;
		}
	}
	return res;
}

void GIntervalsBigSet2D::begin_save(const char *intervset, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	string path = interv2path(iu.get_env(), intervset);
	if (mkdir(path.c_str(), 0777))
		verror("Cannot create intervals directory at %s: %s", path.c_str(), strerror(errno));

	chromstats.clear();
	chromstats.resize(iu.get_chromkey().get_num_chroms() * iu.get_chromkey().get_num_chroms());
}

void GIntervalsBigSet2D::save_chrom_plain_intervals(const char *intervset, GIntervals2D &intervals, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	if (intervals.size()) {
		SEXP rintervals = iu.convert_intervs(&intervals);
		save_chrom(intervset, &intervals, rintervals, iu, chromstats);
		intervals.clear();
	}
}

void GIntervalsBigSet2D::save_chrom(const char *intervset, GIntervalsFetcher2D *intervals, SEXP rintervals, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	if (!intervals->size()) 
		return;

	pair<ChromPair, ChromStat> res = get_chrom_stat(intervals, iu);
	ChromPair &chrompair = res.first;
	ChromStat &chromstat = res.second;	
	chromstats[chrompair.chromid1 * iu.get_chromkey().get_num_chroms() + chrompair.chromid2] = chromstat;

	string filename = interv2path(iu.get_env(), intervset);
	filename += "/";
	filename += iu.id2chrom(chrompair.chromid1);
	filename += "-";
	filename += iu.id2chrom(chrompair.chromid2);
	RSaneSerialize(rintervals, filename.c_str());
}

void GIntervalsBigSet2D::end_save_plain_intervals(const char *intervset, const IntervUtils &iu, const vector<ChromStat> &chromstats)
{
	GIntervals2D intervals;
	SEXP zeroline = iu.convert_intervs(&intervals, GInterval2D::NUM_COLS, false);
	end_save(intervset, zeroline, iu, chromstats);
}

void GIntervalsBigSet2D::end_save(const char *intervset, SEXP zeroline, const IntervUtils &iu, const vector<ChromStat> &chromstats)
{
	size_t num_intervals = 0;
	for (vector<ChromStat>::const_iterator istat = chromstats.begin(); istat < chromstats.end(); ++istat)
		num_intervals += istat->size;

	SEXP rstat;
	SEXP colnames;
	SEXP rownames;
	SEXP chroms1, chroms2, chroms_idx1, chroms_idx2;

	rprotect(rstat = allocVector(VECSXP, NUM_STAT_COLS));

	setAttrib(rstat, R_NamesSymbol, (colnames = allocVector(STRSXP, NUM_STAT_COLS)));
	setAttrib(rstat, R_ClassSymbol, mkString("data.frame"));

	for (int i = 0; i < NUM_STAT_COLS; i++)
		SET_STRING_ELT(colnames, i, mkChar(STAT_COL_NAMES[i]));

	int num_nonempty_chroms = 0;
	for (vector<ChromStat>::const_iterator ichromstat = chromstats.begin(); ichromstat != chromstats.end(); ++ichromstat) {
		if (ichromstat->size) 
			++num_nonempty_chroms;
	}

	SET_VECTOR_ELT(rstat, CHROM1_COL, (chroms_idx1 = allocVector(INTSXP, num_nonempty_chroms)));
	SET_VECTOR_ELT(rstat, CHROM2_COL, (chroms_idx2 = allocVector(INTSXP, num_nonempty_chroms)));
	SET_VECTOR_ELT(rstat, SIZE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, SURFACE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, CONTAINS_OVERLAPS_COL, allocVector(LGLSXP, num_nonempty_chroms));

	setAttrib(rstat, R_RowNamesSymbol, (rownames = allocVector(INTSXP, num_nonempty_chroms)));
	setAttrib(chroms_idx1, R_LevelsSymbol, (chroms1 = allocVector(STRSXP, iu.get_chromkey().get_num_chroms())));
	setAttrib(chroms_idx2, R_LevelsSymbol, (chroms2 = allocVector(STRSXP, iu.get_chromkey().get_num_chroms())));
	setAttrib(chroms_idx1, R_ClassSymbol, mkString("factor"));
	setAttrib(chroms_idx2, R_ClassSymbol, mkString("factor"));

	for (unsigned id = 0; id < (unsigned)iu.get_chromkey().get_num_chroms(); ++id) {
		SET_STRING_ELT(chroms1, id, mkChar(iu.id2chrom(id).c_str()));
		SET_STRING_ELT(chroms2, id, mkChar(iu.id2chrom(id).c_str()));
	}

	int res_index = 0;
	for (int chromid1 = 0; chromid1 < iu.get_chromkey().get_num_chroms(); ++chromid1) {
		for (int chromid2 = 0; chromid2 < iu.get_chromkey().get_num_chroms(); ++chromid2) {
			const ChromStat &chromstat = chromstats[chromid1 * iu.get_chromkey().get_num_chroms() + chromid2];

			if (!chromstat.size) 
				continue;

			INTEGER(chroms_idx1)[res_index] = chromid1 + 1;
			INTEGER(chroms_idx2)[res_index] = chromid2 + 1;
			REAL(VECTOR_ELT(rstat, SIZE_COL))[res_index] = chromstat.size;
			REAL(VECTOR_ELT(rstat, SURFACE_COL))[res_index] = chromstat.surface;
			LOGICAL(VECTOR_ELT(rstat, CONTAINS_OVERLAPS_COL))[res_index] = chromstat.contains_overlaps;
			INTEGER(rownames)[res_index] = res_index + 1;
			++res_index;
		}
	}

	save_meta(intervset, rstat, zeroline, iu);
}

GIntervalsFetcher2D *GIntervalsBigSet2D::create_masked_copy(const set<ChromPair> &chrompairs_mask) const
{
	GIntervalsBigSet2D *obj = new GIntervalsBigSet2D();

	obj->m_intervset = m_intervset;
	obj->m_iu = m_iu;
	obj->m_size = 0;
	obj->m_surface = 0;
	obj->m_chroms2size.clear();
	obj->m_contains_overlaps.clear();
	obj->m_chroms2size.resize(m_chroms2size.size(), 0);
	obj->m_contains_overlaps.resize(m_contains_overlaps.size(), false);
	obj->m_surfaces.resize(m_surfaces.size(), 0);
	obj->m_cur_chromid = obj->m_chroms2size.size();
	obj->m_iter_chromid = -1;
	obj->m_iter_index = 0;
	obj->m_iter_chrom_index = 0;
	obj->m_do_sort = false;
	obj->m_iinterval = obj->m_intervals.end();
	obj->m_orig_chroms2size = m_orig_chroms2size;

	for (int chromid = 0; chromid < obj->m_chroms2size.size(); ++chromid) {
		int chromid1 = idx2chrom1(chromid);
		int chromid2 = idx2chrom2(chromid);

		if (chrompairs_mask.find(ChromPair(chromid1, chromid2)) == chrompairs_mask.end())
			continue;

		obj->m_chroms2size[chromid] = m_chroms2size[chromid];
		obj->m_contains_overlaps[chromid] = m_contains_overlaps[chromid];
		obj->m_surfaces[chromid] = m_surfaces[chromid];
		obj->m_size += (size_t)m_chroms2size[chromid];
		obj->m_surface += m_surfaces[chromid];
	}

	if (m_do_sort)
		obj->sort(m_compare);

	return obj;
}

void GIntervalsBigSet2D::begin_iter()
{
	m_iter_chromid = -1;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	m_intervals.clear();
	for (m_cur_chromid = 0; m_cur_chromid < (int)m_chroms2size.size(); ++m_cur_chromid) {
		if (m_chroms2size[m_cur_chromid]) {
			int chromid1 = idx2chrom1(m_cur_chromid);
			int chromid2 = idx2chrom2(m_cur_chromid);
			load_chrom(chromid1, chromid2);
			m_iinterval = m_intervals.begin();
			return;
		}
	}
}

void GIntervalsBigSet2D::begin_chrom_iter(int chromid1, int chromid2)
{
	int target_chromid = chroms2idx(chromid1, chromid2);
	m_iter_chromid = target_chromid;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	for (m_cur_chromid = 0; m_cur_chromid < (int)m_chroms2size.size(); ++m_cur_chromid) {
		if (m_cur_chromid == target_chromid) {
			if (m_chroms2size[m_cur_chromid]) {
				load_chrom(chromid1, chromid2);
				m_iinterval = m_intervals.begin();
			}
			return;
		}
		m_iter_index += m_chroms2size[m_cur_chromid];
	}
	m_intervals.clear();
	m_iinterval = m_intervals.end();
}

void GIntervalsBigSet2D::sort(Compare_t compare)
{
	m_do_sort = true;
	m_compare = compare;
	if (m_intervals.size()) 
		m_intervals.sort(m_compare);
}

void GIntervalsBigSet2D::verify_no_overlaps(const GenomeChromKey &chromkey, const char *error_prefix) const
{
	for (vector<bool>::const_iterator icontains_overlaps = m_contains_overlaps.begin(); icontains_overlaps < m_contains_overlaps.end(); ++icontains_overlaps)  {
		if (*icontains_overlaps) 
			TGLError<GIntervalsFetcher2D>(OVERLAPPING_INTERVAL, "%sIntervals set %s contains overlapping intervals", error_prefix, m_intervset.c_str());
	}
}
