#include <assert.h>
#include <stdlib.h>
#include <vector>

#include "typedef.h"
#include "safemalloc.h"


typedef struct {
	int len;
	std::vector<int> bitmask;
} Bitmask;

/*
int CanOverlap(const Bitmask* bm1, const Bitmask* bm2, int bm2_rel_pos) {
	int i;
	//printf("dd");
	if(bm2_rel_pos == 0) {
		//printf("1");
		for(i = min(bm1->len, bm2->len) - 1 ; i >= 0 ; i--) {
			if(bm1->bitmask[i] && bm2->bitmask[i]) {
				//printf("ee");
				return 0;
			}
		}
		//printf("ee");
		return 1;
	}
	else if(bm2_rel_pos < 0) return CanOverlap(bm2, bm1, -bm2_rel_pos);
	else {
		//printf("2");
		for(i = min(bm1->len, bm2->len + bm2_rel_pos) - 1 ; i >= bm2_rel_pos ; i--) {
			if(bm1->bitmask[i] && bm2->bitmask[i - bm2_rel_pos]) {
				//printf("ee");
				return 0;
			}
		}
		//printf("ee");
		return 1;
	}
}
*/

int CanOverlap(const Bitmask* bm1, const Bitmask* bm2, int bm2_rel_pos) {
	assert(sizeof(int) == 4); //This code should run correctly only if sizeof(int) == 4.

	//check if memand(bm1.bitmask + bm2_rel_pos, bm2.bitmask) == 0
	const int *p1, *p2;
	int common_remain;
	if(bm2_rel_pos < 0) return CanOverlap(bm2, bm1, -bm2_rel_pos);
	else if(bm2_rel_pos == 0) {
		common_remain = min(bm1->len, bm2->len);
		p1 = &(bm1->bitmask[0]);
		p2 = &(bm2->bitmask[0]);
quick_compare:
		while(common_remain >= 0x20) {
			if(((*p1) & (*p2)) != 0) return 0;
			p1++; p2++;
			common_remain -= 0x20;
		}
		if(common_remain == 0) return 1;
		else return ((*p1) & (*p2)) == 0;
	}
	else if(bm2_rel_pos > 0) {
		//endian-neutral code.
	    if(bm2_rel_pos >= bm1->len) return 1;
		int bit_offset = bm2_rel_pos & 0x1f;
		int inv_bit_offset = 32 - bit_offset;
		p1 = &(bm1->bitmask[0]) + (bm2_rel_pos >> 5);
		p2 = &(bm2->bitmask[0]);
		common_remain = min(bm1->len - bm2_rel_pos, bm2->len);
		if(bit_offset == 0) goto quick_compare;

		//int bm1_remain = bm1->len - bm2_rel_pos;
		//int bm2_remain = bm2->len;

		while(1) {
			if(common_remain < 0x20) return ((*p1 >> bit_offset) & (*p2) & ((~0) >> bit_offset)) == 0;
			if(((*p1 >> bit_offset) & (*p2) & ((~0) >> bit_offset)) != 0) return 0;
			common_remain -= inv_bit_offset;
			p1++;

			if(common_remain < 0x20) return ((*p1) & (*p2 >> inv_bit_offset) & ((~0) >> inv_bit_offset)) == 0;
			if(((*p1) & (*p2 >> inv_bit_offset) & ((~0) >> inv_bit_offset)) != 0) return 0;
			common_remain -= bit_offset;
			p2++;
		}
	}
	return 0; //To make compiler happy;
}


Bitmask* AssignBitmask(Chunk *chk) {
    ulong len = chk->len + 16;
    ulong i;
	Bitmask *bm = new Bitmask;
	bm->len = len;
	bm->bitmask.resize((len + 0x1f) >> 5);

	memset(&(bm->bitmask[0]), 0, sizeof(int) * ((len + 0x1f) >> 5));

	bm->bitmask[0] |= 0xff;
	for(i = 8 ; i < chk->len + 8 ; i++) {
		bm->bitmask[i >> 5] |= (chk->rep_table[i - 8] == 0) << (i & 0x1f);
	}
	for(  ; i < len ; i++) {
		bm->bitmask[i >> 5] |= 1 << (i & 0x1f);
	}
	return bm;
}

void selective_copy(Chunk *chk, Chunk *data, int offset) {
	ulong i;
	memcpy(chk->data + offset, data->type, 4);
	memcpy(chk->data + offset + 4, &data->len, 4);
	offset += 8;
	
	for(i = 0 ; i < data->len ; i++) {
		if(data->rep_table[i] == 1) continue;
		chk->data[i + offset] = data->data[i];
	}
}

Chunk* ConstructChunk(std::vector<Chunk*> chktb) {
	if(chktb.empty()) return NULL;

	if(0) {
		int totlen = 0;
		for(ulong i = 0 ; i < chktb.size() ; i++) {
			totlen += chktb[i]->len + 8;
		}
		Chunk *chk = GetBlankChunk("    ", totlen);
		int idx = 0;
		for(ulong i = 0 ; i < chktb.size() ; i++) {
			memcpy(chk->data + idx, chktb[i]->type, 4); idx += 4;
			memcpy(chk->data + idx, &chktb[i]->len, 4); idx += 4;
			memcpy(chk->data + idx, chktb[i]->data, chktb[i]->len); idx += chktb[i]->len;
		}
		return chk;
	}

	std::vector<int> offset;
	std::vector<Bitmask*> bm;
	ulong i, k;
	int j;
	int data_start = 0;
	int data_end = chktb[0]->len + 16;

	for(i = 0 ; i < chktb.size() ; i++) {
		Bitmask *bm1 = AssignBitmask(chktb[i]);
		bm.push_back(bm1);
	}
	
	offset.push_back(0);
	for(i = 1 ; i < chktb.size() ; i++) {
		printf("%d/%d - ", i, chktb.size());
		int offs = 0, minlen = 0x7fffffff;
		//go forward first.
		int prid_data_start, prid_data_end;
		for(j = data_start - 16 - chktb[i]->len ; j <= data_end ; j++) {
			//printf("%d/%d/%d\n", data_start - 16 - chktb[i]->len, j, data_end);
			for(k = 0 ; k < i ; k++) {
				if(CanOverlap(bm[i], bm[k], j - offset[k]) == 0) break;
			}
			if(k == i) {
				prid_data_start = min(offs, data_start);
				prid_data_end = max((int)(offs + chktb[i]->len + 16), data_end);
				if(prid_data_end - prid_data_start < minlen) {
					minlen = prid_data_end - prid_data_start;
					offs = j;
				}
			}
		}

		offset.push_back(offs);
		data_start = min(offs, data_start);
		data_end = max((int)(offs + chktb[i]->len + 16), data_end);
		printf("%d/%d\n", data_start, data_end);
	}

	for(i = 0 ; i < chktb.size() ; i++) offset[i] -= data_start;
	data_end -= data_start;
	data_start = 0;
	printf("%d/%d\n", data_start, data_end);
	{
		int minoff_idx;
		for(i = 0 ; i < chktb.size() ; i++) {
			if(offset[i] == 0) break;
		}
		//printf("%d/%d\n", i, chktb.size());
		minoff_idx = i;

		int *t = new int[chktb.size()];
		std::vector<Chunk*> chks;
		for(i = 0 ; i < chktb.size() ; i++) {
			t[i] = offset[(i + minoff_idx) % chktb.size()];
			chks.push_back(chktb[(i + minoff_idx) % chktb.size()]);
		}
		for(i = 0 ; i < chktb.size() ; i++) {
			offset[i] = t[i];
			chktb[i] = chks[i];
		}
		delete[] t;
	}

	offset.push_back(data_end);

	Chunk* chk = GetBlankChunk("    ", data_end);

	for(i = 0 ; i < chktb.size() ; i++)
		selective_copy(chk, chktb[i], offset[i]);

	for(i = 0 ; i < chktb.size() ; i++) {
		int jmps_offset = offset[i] + 8 + chktb[i]->len;
		memcpy(chk->data + jmps_offset, "JUMP", 4);
		*(ulong*)(chk->data + jmps_offset + 4) = offset[i + 1] - (jmps_offset + 8);
		delete bm[i];
	}
	return chk;
}