#include <sys/stat.h>
#include <sys/types.h>

#include "GIntervalsBigSet1D.h"
#include "rdbutils.h"

//------------------------------------- GIntervalsBigSet1D --------------------------------------

const char *GIntervalsBigSet1D::STAT_COL_NAMES[NUM_STAT_COLS] = {
	"chrom", "contains_overlaps", "size", "unified_overlap_size", "unified_touching_size",
	"range", "unified_overlap_range"
};

void GIntervalsBigSet1D::init(const char *intervset, SEXP meta, const IntervUtils &iu)
{
	GIntervalsBigSet::init(intervset, iu);

	if (!is1d(meta)) 
		verror("Intervals set %s: expecting 1D intervals", intervset);

	m_size = 0;
	m_range = 0;
	m_contains_overlaps = false;
	m_user_chrom2size = &m_chrom2size;
	m_chrom2size.clear();
	m_chrom2unified_overlap_size.clear();
	m_chrom2unified_touching_size.clear();
	m_chrom2range.clear();
	m_chrom2unified_overlap_range.clear();
	m_chrom2size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	m_chrom2unified_overlap_size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	m_chrom2unified_touching_size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	m_chrom2range.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	m_chrom2unified_overlap_range.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	m_cur_chromid = m_chrom2size.size();
	m_iter_chrom = -1;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	m_do_sort = false;
	m_do_unify_overlaps = false;
	m_iinterval = m_intervals.end();

	if (!isVector(meta) || length(meta) < 1) 
		verror("Invalid format of intervals set %s", intervset);

	SEXP stat = VECTOR_ELT(meta, 0);
	SEXP colnames = getAttrib(stat, R_NamesSymbol);

	if (length(stat) != NUM_STAT_COLS || !isString(colnames) || length(colnames) != NUM_STAT_COLS || strcmp(CHAR(STRING_ELT(colnames, 0)), STAT_COL_NAMES[0]))
		verror("Invalid format of intervals set %s", intervset);

	for (int i = 1; i < NUM_STAT_COLS; ++i) {
		if (length(VECTOR_ELT(stat, i - 1)) != length(VECTOR_ELT(stat, i)) || strcmp(CHAR(STRING_ELT(colnames, i)), STAT_COL_NAMES[i]))
			verror("Invalid format of intervals set %s", intervset);
	}

	SEXP chroms = VECTOR_ELT(stat, CHROM_COL);
	SEXP chrom_levels = getAttrib(chroms, R_LevelsSymbol);
	SEXP sizes = VECTOR_ELT(stat, SIZE_COL);
	SEXP unified_overlap_sizes = VECTOR_ELT(stat, UNIFIED_OVERLAP_SIZE_COL);
	SEXP unified_touching_sizes = VECTOR_ELT(stat, UNIFIED_TOUCHING_SIZE_COL);
	SEXP ranges = VECTOR_ELT(stat, RANGE_COL);
	SEXP unified_overlap_ranges = VECTOR_ELT(stat, UNIFIED_OVERLAP_RANGE_COL);
	SEXP contains_overlaps = VECTOR_ELT(stat, CONTAINS_OVERLAPS_COL);

	for (int i = 0; i < length(sizes); ++i) {
		const char *chrom = isString(chroms) ? CHAR(STRING_ELT(chroms, i)) : CHAR(STRING_ELT(chrom_levels, INTEGER(chroms)[i] - 1));
		int chromid = m_iu->chrom2id(chrom);
		int64_t size = (int64_t)(isReal(sizes) ? REAL(sizes)[i] : INTEGER(sizes)[i]);
		int64_t unified_overlap_size = (int64_t)(isReal(unified_overlap_sizes) ? REAL(unified_overlap_sizes)[i] : INTEGER(unified_overlap_sizes)[i]);
		int64_t unified_touching_size = (int64_t)(isReal(unified_touching_sizes) ? REAL(unified_touching_sizes)[i] : INTEGER(unified_touching_sizes)[i]);
		int64_t range = (int64_t)(isReal(ranges) ? REAL(ranges)[i] : INTEGER(ranges)[i]);
		int64_t unified_overlap_range = (int64_t)(isReal(unified_overlap_ranges) ? REAL(unified_overlap_ranges)[i] : INTEGER(unified_overlap_ranges)[i]);

		m_chrom2size[chromid] = size;
		m_chrom2unified_overlap_size[chromid] = unified_overlap_size;
		m_chrom2unified_touching_size[chromid] = unified_touching_size;
		m_chrom2range[chromid] = range;
		m_chrom2unified_overlap_range[chromid] = unified_overlap_range;
		m_size += (size_t)size;
		m_range += (size_t)range;
		m_contains_overlaps |= LOGICAL(contains_overlaps)[i];
	}

	m_orig_chrom2size = m_chrom2size;
}

void GIntervalsBigSet1D::load_chrom(int chromid)
{
	m_iter_chrom_index = 0;
	if (get_num_intervals(chromid) && (m_intervals.empty() || m_intervals.front().chromid != chromid)) {
		string filename = interv2path(m_iu->get_env(), m_intervset);
		filename += "/";
		filename += m_iu->id2chrom(chromid);
		SEXP rintervals = RSaneUnserialize(filename.c_str());
		rprotect(rintervals);
		m_iu->convert_rintervs(rintervals, &m_intervals, NULL);
		runprotect(rintervals);

		// set udata
		size_t offset = 0;
		for (int i = 0; i < chromid; ++i)
			offset += m_orig_chrom2size[i];
		for (GIntervals::iterator iinterval = m_intervals.begin(); iinterval < m_intervals.end(); ++iinterval) 
			iinterval->udata = (void *)(intptr_t)(iinterval - m_intervals.begin() + offset);

		if (m_do_sort) 
			m_intervals.sort(m_compare);

		if (m_do_unify_overlaps) 
			m_intervals.unify_overlaps(m_unify_touching_intervals);
	} else
		m_intervals.clear();
}

pair<int, GIntervalsBigSet1D::ChromStat> GIntervalsBigSet1D::get_chrom_stat(GIntervalsFetcher1D *_intervals)
{
	pair<int, ChromStat> res;
	int &chromid = res.first;

	chromid = -1;

	if (_intervals->size()) {
		if (_intervals->num_chroms() > 1) 
			verror("get_chrom_stat found more than one chromosome in the intervals");

		// we are going to unify overlaps in our intervals => create a copy
		GIntervals intervals;

		intervals.reserve(_intervals->size());
		for (_intervals->begin_iter(); !_intervals->isend(); _intervals->next())
			intervals.push_back(_intervals->cur_interval());

		chromid = intervals.front().chromid;

		ChromStat &chromstat = res.second;
		chromstat.size = intervals.size();
		chromstat.range = intervals.range();
		intervals.unify_overlaps(false);
		chromstat.unified_overlap_size = intervals.size();
		chromstat.unified_overlap_range = intervals.range();
		intervals.unify_overlaps(true);
		chromstat.unified_touching_size = intervals.size();
		chromstat.contains_overlaps = chromstat.size != chromstat.unified_overlap_size;
	}
	return res;
}

void GIntervalsBigSet1D::begin_save(const char *intervset, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	string path = interv2path(iu.get_env(), intervset);
	if (mkdir(path.c_str(), 0777))
		verror("Cannot create intervals directory at %s: %s", path.c_str(), strerror(errno));

	chromstats.clear();
	chromstats.resize(iu.get_chromkey().get_num_chroms());
}

void GIntervalsBigSet1D::save_chrom_plain_intervals(const char *intervset, GIntervals &intervals, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	if (intervals.size()) {
		SEXP rintervals = iu.convert_intervs(&intervals);
		save_chrom(intervset, &intervals, rintervals, iu, chromstats);
		intervals.clear();
	}
}

void GIntervalsBigSet1D::save_chrom(const char *intervset, GIntervalsFetcher1D *intervals, SEXP rintervals, const IntervUtils &iu, vector<ChromStat> &chromstats)
{
	if (!intervals->size()) 
		return;

	pair<int, ChromStat> res = get_chrom_stat(intervals);
	int &chromid = res.first;
	ChromStat &chromstat = res.second;
	chromstats[chromid] = chromstat;

	string filename = interv2path(iu.get_env(), intervset);
	filename += "/";
	filename += iu.id2chrom(chromid);
	RSaneSerialize(rintervals, filename.c_str());
}

void GIntervalsBigSet1D::end_save_plain_intervals(const char *intervset, const IntervUtils &iu, const vector<ChromStat> &chromstats)
{
	GIntervals intervals;
	SEXP zeroline = iu.convert_intervs(&intervals, GInterval::NUM_COLS, false);
	end_save(intervset, zeroline, iu, chromstats);
}

void GIntervalsBigSet1D::end_save(const char *intervset, SEXP zeroline, const IntervUtils &iu, const vector<ChromStat> &chromstats)
{
	size_t num_intervals = 0;
	for (vector<ChromStat>::const_iterator istat = chromstats.begin(); istat < chromstats.end(); ++istat)
		num_intervals += istat->size;

	SEXP rstat;
	SEXP colnames;
	SEXP rownames;
	SEXP chroms, chroms_idx;

	rprotect(rstat = allocVector(VECSXP, NUM_STAT_COLS));

	setAttrib(rstat, R_NamesSymbol, (colnames = allocVector(STRSXP, NUM_STAT_COLS)));
	setAttrib(rstat, R_ClassSymbol, mkString("data.frame"));

	for (int i = 0; i < NUM_STAT_COLS; i++)
		SET_STRING_ELT(colnames, i, mkChar(STAT_COL_NAMES[i]));

	int num_nonempty_chroms = 0;
	for (vector<ChromStat>::const_iterator ichromstat = chromstats.begin(); ichromstat != chromstats.end(); ++ichromstat) {
		if (ichromstat->size) 
			++num_nonempty_chroms;
	}

	SET_VECTOR_ELT(rstat, CHROM_COL, (chroms_idx = allocVector(INTSXP, num_nonempty_chroms)));
	SET_VECTOR_ELT(rstat, SIZE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, UNIFIED_OVERLAP_SIZE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, UNIFIED_TOUCHING_SIZE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, RANGE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, UNIFIED_OVERLAP_RANGE_COL, allocVector(REALSXP, num_nonempty_chroms));
	SET_VECTOR_ELT(rstat, CONTAINS_OVERLAPS_COL, allocVector(LGLSXP, num_nonempty_chroms));

	setAttrib(rstat, R_RowNamesSymbol, (rownames = allocVector(INTSXP, num_nonempty_chroms)));
	setAttrib(chroms_idx, R_LevelsSymbol, (chroms = allocVector(STRSXP, iu.get_chromkey().get_num_chroms())));
	setAttrib(chroms_idx, R_ClassSymbol, mkString("factor"));

	for (unsigned id = 0; id < (unsigned)iu.get_chromkey().get_num_chroms(); ++id)
		SET_STRING_ELT(chroms, id, mkChar(iu.id2chrom(id).c_str()));

	int res_index = 0;
	for (vector<ChromStat>::const_iterator ichromstat = chromstats.begin(); ichromstat != chromstats.end(); ++ichromstat) {
		if (!ichromstat->size) 
			continue;

		INTEGER(chroms_idx)[res_index] = ichromstat - chromstats.begin() + 1;
		REAL(VECTOR_ELT(rstat, SIZE_COL))[res_index] = ichromstat->size;
		REAL(VECTOR_ELT(rstat, UNIFIED_OVERLAP_SIZE_COL))[res_index] = ichromstat->unified_overlap_size;
		REAL(VECTOR_ELT(rstat, UNIFIED_TOUCHING_SIZE_COL))[res_index] = ichromstat->unified_touching_size;
		REAL(VECTOR_ELT(rstat, RANGE_COL))[res_index] = ichromstat->range;
		REAL(VECTOR_ELT(rstat, UNIFIED_OVERLAP_RANGE_COL))[res_index] = ichromstat->unified_overlap_range;
		LOGICAL(VECTOR_ELT(rstat, CONTAINS_OVERLAPS_COL))[res_index] = ichromstat->contains_overlaps;
		INTEGER(rownames)[res_index] = res_index + 1;
		++res_index;
	}

	save_meta(intervset, rstat, zeroline, iu);
}

GIntervalsFetcher1D *GIntervalsBigSet1D::create_masked_copy(const set<int> &chromids_mask) const
{
	GIntervalsBigSet1D *obj = new GIntervalsBigSet1D();

	obj->m_intervset = m_intervset;
	obj->m_iu = m_iu;
	obj->m_size = 0;
	obj->m_range = 0;
	obj->m_contains_overlaps = false;
	obj->m_user_chrom2size = &obj->m_chrom2size;
	obj->m_chrom2size.clear();
	obj->m_chrom2unified_overlap_size.clear();
	obj->m_chrom2unified_touching_size.clear();
	obj->m_chrom2range.clear();
	obj->m_chrom2unified_overlap_range.clear();
	obj->m_chrom2size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	obj->m_chrom2unified_overlap_size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	obj->m_chrom2unified_touching_size.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	obj->m_chrom2range.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	obj->m_chrom2unified_overlap_range.resize(m_iu->get_chromkey().get_num_chroms(), 0);
	obj->m_cur_chromid = obj->m_chrom2size.size();
	obj->m_iter_chrom = -1;
	obj->m_iter_index = 0;
	obj->m_iter_chrom_index = 0;
	obj->m_do_sort = false;
	obj->m_do_unify_overlaps = false;
	obj->m_iinterval = obj->m_intervals.end();
	obj->m_orig_chrom2size = m_orig_chrom2size;

	for (int chromid = 0; chromid < (int)m_iu->get_chromkey().get_num_chroms(); ++chromid) {
		if (chromids_mask.find(chromid) == chromids_mask.end()) 
			continue;

		obj->m_chrom2size[chromid] = m_chrom2size[chromid];
		obj->m_chrom2unified_overlap_size[chromid] = m_chrom2unified_overlap_size[chromid];
		obj->m_chrom2unified_touching_size[chromid] = m_chrom2unified_touching_size[chromid];
		obj->m_chrom2range[chromid] = m_chrom2range[chromid];
		obj->m_chrom2unified_overlap_range[chromid] = m_chrom2unified_overlap_range[chromid];
		obj->m_size += (size_t)m_chrom2size[chromid];
		obj->m_range += (size_t)m_chrom2range[chromid];
		obj->m_contains_overlaps |= m_chrom2size[chromid] != m_chrom2unified_overlap_size[chromid];
	}

	if (m_do_sort)
		obj->sort(m_compare);

	if (m_do_unify_overlaps) 
		obj->unify_overlaps(m_unify_touching_intervals);

	return obj;
}

void GIntervalsBigSet1D::begin_iter()
{
	m_iter_chrom = -1;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	m_intervals.clear();
	for (m_cur_chromid = 0; m_cur_chromid < (int)m_chrom2size.size(); ++m_cur_chromid) {
		if (get_num_intervals(m_cur_chromid)) {
			load_chrom(m_cur_chromid);
			m_iinterval = m_intervals.begin();
			return;
		}
	}
}

void GIntervalsBigSet1D::begin_chrom_iter(int chromid)
{
	m_iter_chrom = chromid;
	m_iter_index = 0;
	m_iter_chrom_index = 0;
	for (m_cur_chromid = 0; m_cur_chromid < (int)m_chrom2size.size(); ++m_cur_chromid) {
		if (m_cur_chromid == chromid) {
			if (get_num_intervals(m_cur_chromid)) {
				load_chrom(m_cur_chromid);
				m_iinterval = m_intervals.begin();
			}
			return;
		}
		m_iter_index += get_num_intervals(m_cur_chromid);
	}
	m_intervals.clear();
	m_iinterval = m_intervals.end();
}

void GIntervalsBigSet1D::sort(Compare_t compare)
{
	m_do_sort = true;
	m_compare = compare;
	if (m_intervals.size())
		m_intervals.sort(m_compare); 
}

void GIntervalsBigSet1D::unify_overlaps(bool unify_touching_intervals)
{
	if (m_do_unify_overlaps && m_unify_touching_intervals == unify_touching_intervals) 
		return;

	m_do_unify_overlaps = true;
	m_unify_touching_intervals = unify_touching_intervals;

	m_size = 0;
	m_range = 0;
	if (m_unify_touching_intervals) {
		m_user_chrom2size = &m_chrom2unified_touching_size;
		for (vector<int64_t>::const_iterator isize = m_chrom2unified_touching_size.begin(); isize < m_chrom2unified_touching_size.end(); ++isize)
			m_size += *isize;
	} else {
		m_user_chrom2size = &m_chrom2unified_overlap_size;
		for (vector<int64_t>::const_iterator isize = m_chrom2unified_overlap_size.begin(); isize < m_chrom2unified_overlap_size.end(); ++isize) 
			m_size += *isize;
	}
	for (vector<int64_t>::const_iterator irange = m_chrom2unified_overlap_range.begin(); irange < m_chrom2unified_overlap_range.end(); ++irange) 
		m_range += *irange;

	if (m_intervals.size())
		m_intervals.unify_overlaps(m_unify_touching_intervals);
}

void GIntervalsBigSet1D::verify_no_overlaps(const GenomeChromKey &chromkey, const char *error_prefix) const
{
	if (m_contains_overlaps)
		TGLError<GIntervalsFetcher1D>(OVERLAPPING_INTERVAL, "%sIntervals set %s contains overlapping intervals", error_prefix, m_intervset.c_str());
}

