Hatena::Grouptopcoder

nodchipのTopCoder日記 このページをアンテナに追加 RSSフィード

 | 

2010-07-08

2010 TCO Marathon Round 2 11:25 2010 TCO Marathon Round 2 - nodchipのTopCoder日記 を含むブックマーク はてなブックマーク - 2010 TCO Marathon Round 2 - nodchipのTopCoder日記 2010 TCO Marathon Round 2 - nodchipのTopCoder日記 のブックマークコメント

CellularAutomaton

初期盤面としてRxCの盤面上に白マスと黒マスが与えられる。セルオートマトンをKターン適用したとき、黒マスの数が多くなるように初期盤面を変更せよ。マスの色は最大N個まで反転できる。

初見

  • セルオートマトンってなんだっけ・・・
  • とりあえずVisualizer眺めてみる
  • あぁ、周囲のマスの状況で次のターンのマスの状態が変わるあれか
  • 方針さっぱりたたん
  • とりあえず焼きなましてみる?

序盤

  • だめだ。全く話にならない。
  • 多分もっと高速化しなければならないんだろうなぁ
  • ・・・
  • マスの変化の条件が簡単だからSSEとか使えないかなぁ
  • 使えそう・・・
  • マスの状態をcharで持たせれば、周囲の黒マスの数は_mm_add_epi8()を使って16マス同時にカウント出来る
  • セルオートマトンの適用は_mm_cmpeq_epi8()でマスクを作って、対応する状態を_mm_or_si128()で足し合わせれはおk
  • 実際にやってみた
  • 結構スコア上がった!
  • この調子でどんどん高速化して行ってみよう

前半

中盤

  • それでも・・・
  • 速 さ が 足 り な い !
  • ・・・
  • いま J☆I☆T という電波を受信した気がしたのだが気のせいだろうか?
  • いや、きっと気のせいじゃないはず!
  • よしJITの実装だ!
  • JITの適用箇所はセルオートマトンの適用部分。関数一回呼び出すために無駄にpush/popしている。
  • この部分を関数呼び出し無しで連続で実行出来るように改造してみる
  • まずはコンパイルされた機械語コピペ
  • 機械語を確保した領域にぺたぺた
  • 最後にret命令を書き込んで終わり
  • 動くかな・・・
  • ・・・
  • 動かない・・・orz
  • 何がまずいんだろう?
  • @rofi先生によるとVirtualAlloc/mmapで実行可能フラグを立てた領域に書き込まないと動かないらしい
  • やってみた
  • ・・・
  • 動いた!!!
  • 速度1.5倍アップ!49.75ptsをマーク!キタコレ

終盤

  • もう微調整ぐらいしかやることが無い・・・
  • '-'の場合は処理をしなくて良いのでadd命令を削ったりまとめたり
  • プロファイル取って遅いところをSSE化してみたり

感想

  • JIT楽しかった。またやろう。

#define REP(i,n) for(int i=0;i<(int)n;++i)
#define ALL(c) (c).begin(), (c).end()
#ifdef _MSC_VER
#define ALIGNED(x) __declspec(align(16)) x
#else
#define ALIGNED(x) x __attribute__ ((aligned(16)))
#endif

#ifdef _MSC_VER
#include <Windows.h>
ll getTime() {
    return GetTickCount();
}
#else
#include <sys/time.h>
ll getTime() {
    struct timeval tv;
    gettimeofday(&tv, NULL);
    ll result =  tv.tv_sec * 1000LL + tv.tv_usec / 1000LL;
    //cerr << result << endl;
    return result;
}
#endif

#define ENABLE_JIT
//#undef  ENABLE_JIT

// http://www.jstatsoft.org/v08/i14/xorshift.pdf
unsigned long xor128() {     // 周期は 2^128-1
    static unsigned long x=123456789,y=362436069,z=521288629,w=88675123;
    unsigned long t;
    t=(x^(x<<11));x=y;y=z;z=w; return( w=(w^(w>>19))^(t^(t>>8)) );
}

static int dr[] = { 0, 1, 0, -1, 1, 1, -1, -1 };
static int dc[] = { 1, 0, -1, 0, 1, -1, -1, 1 };
static vector<string> initialGrid;
static int N;
static int K;
static int R;
static int C;

struct Position {
    int row, column;
    Position(int row, int column) : row(row), column(column) { }
};

//TODO:状態を表す型/構造体を作成する
class STATE {
public:
    //TODO:コンストラクタに必要な引数を追加する
    explicit STATE();
    void next();
    void prev();
    double energy();
    const vector<string> getGrid() const {
        vector<string> g;
        REP(r, R) {
            ostringstream oss;
            REP(c, C) {
                oss << (int)grid[r + 1][c];
            }
            g.push_back(oss.str());
        }
        return g;
    }
private:
	void selectPanel(int& row, int& column) const;

	ALIGNED(unsigned char grid[128][128]);
    vector<Position> positions;
    bool changed[128][128];
    ALIGNED(unsigned char lastGridEnd[128][128]);
    int lastRow;
    int lastColumn;
    int lastType;
};

class SimulatedAnnealing {
public:
    //TODO:コンストラクタに必要な引数を追加する
    SimulatedAnnealing();
    STATE run();
private:
    static const int TIME_LIMIT;
    double calculateProbability(double score, double scoreNeighbor, double temperature);
};

//TODO:コンストラクタに必要な引数を追加する
SimulatedAnnealing::SimulatedAnnealing() {
}

//TODO:計算時間をミリ秒単位で設定する
const int SimulatedAnnealing::TIME_LIMIT = 9900;

STATE SimulatedAnnealing::run() {
    const ll timeStart = getTime();
    const ll timeEnd = timeStart + TIME_LIMIT;
    //cerr << timeStart << " " << timeEnd << endl;
    STATE state;
    double energy = state.energy();
    STATE result = state;
    double minEnergy = energy;
    int counter = 0;
    ll timeCurrent;
    while ((timeCurrent = getTime()) < timeEnd){
		REP(loop, 300) {
			//cerr << "current:" << timeCurrent << " end:" << timeEnd << endl;
			state.next();
			const double energyNeighbor = state.energy();
			const double random = xor128() * 0.00000000023283064365386962890625;
			const double probability = calculateProbability(energy, energyNeighbor, (double)(timeEnd - timeCurrent) / (double)(timeEnd - timeStart) + 1e-8);
			if (random < probability){
				//Accept
				if (minEnergy > energyNeighbor) {
#ifdef _MSC_VER
					fprintf(stderr, "minEnergy updated! %.5lf -> %.5lf\n", minEnergy, energyNeighbor);
#endif
					minEnergy = energyNeighbor;
					result = state;
				}
				//fprintf(stderr, "Accepted %.5lf -> %.5lf : minEnergy=%.5lf\n", energy, energyNeighbor, minEnergy);
				energy = energyNeighbor;
			} else {
				//Decline
				state.prev();
				//fprintf(stderr, "Declined\n");
			}
			++counter;
		}
    }
    fprintf(stderr, "counter:%d minEnergy:%.5lf\n", counter, minEnergy);
    return result;
}

double SimulatedAnnealing::calculateProbability(double energy, double energyNeighbor, double temperature) {
	if (energyNeighbor < energy) {
		return 1;
	} else {
		const double result = exp((energy - energyNeighbor) * 0.5) * temperature;
		//fprintf(stderr, "%lf -> %lf * %lf = %lf\n", energy, energyNeighbor, temperature, result);
		return result;
	}
}

//TODO:初期状態を作る関数を記述する
STATE::STATE() {
    REP(r, R) {
        REP(c, C) {
            grid[r + 1][c] = initialGrid[r][c] - '0';
        }
    }
    memset(changed, 0, sizeof(changed));
}

//TODO:遷移後の状態を作る関数を記述する
enum {
	TYPE_ADD,
	TYPE_REMOVE,
	TYPE_SWAP,
	NUMBER_OF_TYPE
};

void STATE::selectPanel(int& bestRow, int& bestColumn) const {
	// 周囲5x5の白マスの多いパネルを選ぶ
	int bestCounter = INT_MAX;
	//const int numberOfLoops = 3;
	const int numberOfLoops = rand() % 10 + 1;
	for (int loop = 0; loop < numberOfLoops; ++loop) {
		int row, column;
		do {
			row = xor128() % R * 1;
			column = xor128() % C;
		} while (changed[row][column]);

		int counter = 0;
		for (int dr = -2; dr <= 2; ++dr) {
			for (int dc = -2; dc <= 2; ++dc) {
				counter += lastGridEnd[(row + dr + R) % R][(column + dc + C) % C];
			}
		}

		if (bestCounter > counter) {
			bestCounter = counter;
			bestRow = row;
			bestColumn = column;
		}
	}
}

void STATE::next() {
	lastType = xor128() % NUMBER_OF_TYPE;
	//if (positions.empty()) {
	//	lastType = TYPE_ADD;
	//} else if (positions.size() == N) {
	//	lastType = TYPE_REMOVE;
	//}
	if (positions.empty()) {
		lastType = TYPE_ADD;
	} else if (lastType == TYPE_ADD && positions.size() == N) {
		lastType = xor128() % 2 + 1;
	}

	switch (lastType) {
	case TYPE_ADD:
		{
			selectPanel(lastRow, lastColumn);

			positions.push_back(Position(lastRow, lastColumn));
			changed[lastRow][lastColumn] = true;
			grid[lastRow][lastColumn] = 1 - grid[lastRow][lastColumn];
		}
		break;
	case TYPE_REMOVE:
		{
			const int removeIndex = xor128() % positions.size();
			swap(positions[removeIndex], positions.back());
			lastRow = positions.back().row;
			lastColumn = positions.back().column;
			positions.pop_back();
			changed[lastRow][lastColumn] = false;
			grid[lastRow][lastColumn] = 1 - grid[lastRow][lastColumn];
		}
		break;
	case TYPE_SWAP:
		{
			selectPanel(lastRow, lastColumn);
			changed[lastRow][lastColumn] = true;
			grid[lastRow][lastColumn] ^= 1;

			const int removeIndex = xor128() % positions.size();
			swap(positions[removeIndex], positions.back());
			swap(positions.back().row, lastRow);
			swap(positions.back().column, lastColumn);

			changed[lastRow][lastColumn] = false;
			grid[lastRow][lastColumn] ^= 1;
		}
		break;
	}
}

//TODO:遷移前の状態を作る関数を記述する
//     高々一つ前の状態までに戻れれば良い
void STATE::prev() {
	switch (lastType) {
	case TYPE_ADD:
		{
			positions.pop_back();
			changed[lastRow][lastColumn] = false;
			grid[lastRow][lastColumn] ^= 1;
		}
		break;
	case TYPE_REMOVE:
		{
			positions.push_back(Position(lastRow, lastColumn));
			changed[lastRow][lastColumn] = true;
			grid[lastRow][lastColumn] ^= 1;
		}
		break;
	case TYPE_SWAP:
		{
			changed[lastRow][lastColumn] = true;
			grid[lastRow][lastColumn] ^= 1;

			swap(positions.back().row, lastRow);
			swap(positions.back().column, lastColumn);

			changed[lastRow][lastColumn] = false;
			grid[lastRow][lastColumn] ^= 1;
		}
		break;
	}
}

const __m128i ONE = _mm_set1_epi8(1);
const __m128i CONSTANTS[] = {
    _mm_set1_epi8(0),
    _mm_set1_epi8(1),
    _mm_set1_epi8(2),
    _mm_set1_epi8(3),
    _mm_set1_epi8(4),
    _mm_set1_epi8(5),
    _mm_set1_epi8(6),
    _mm_set1_epi8(7),
    _mm_set1_epi8(8),
};

#ifdef ENABLE_JIT
static const int JIT_CODE_SIZE = 1024;
void *JIT_CODE;
#else
 //xmm3 : ONE
 //xmm4 : ZERO
 //xmm5 : counter
 //xmm6 : back
 //xmm7 : front
void xmmRule0() {
#ifdef _MSC_VER
    __asm {
        movdqa    xmm0, [eax]    // xmm0 = constant
        pcmpeqb    xmm0, xmm5                    // xmm0 = _mm_cmpeq_epi8(xmm0, back)
        pand    xmm0, xmm6                    // xmm0 = _mm_and_si128(xmm0, xmm6)
        por        xmm7, xmm0                    // xmm7 = _mm_or_si128(xmm7, xmm0)
        add        eax, 0x10
    }
#else
    asm __volatile__ (
        "movdqa        (%%eax), %%xmm0\n\t"
        "pcmpeqb    %%xmm5, %%xmm0\n\t"
        "pand        %%xmm6, %%xmm0\n\t"
        "por        %%xmm0, %%xmm7\n\t"
        "add        $0x10, %%eax\n\t"
        : : :
    );
#endif
}
void xmmRule1() {
#ifdef _MSC_VER
    __asm {
        movdqa    xmm0, [eax]    // xmm0 = constant
        pcmpeqb    xmm0, xmm5    // xmm0 = _mm_cmpeq_epi8(xmm0, back)
        pand    xmm0, xmm3    // xmm0 = _mm_and_si128(xmm0, one)
        por        xmm7, xmm0    // xmm7 = _mm_or_si128(xmm7, xmm0)
        add        eax, 0x10
    }
#else
    asm __volatile__ (
        "movdqa        (%%eax), %%xmm0\n\t"
        "pcmpeqb    %%xmm5, %%xmm0\n\t"
        "pand        %%xmm3, %%xmm0\n\t"
        "por        %%xmm0, %%xmm7\n\t"
        "add        $0x10, %%eax\n\t"
        : : :
    );
#endif
}
void xmmRule2() {
#ifdef _MSC_VER
    __asm {
        add        eax, 0x10
    }
#else
    asm __volatile__ (
        "add    $0x10, %%eax\n\t"
        : : :
    );
#endif
}
void xmmRule3() {
#ifdef _MSC_VER
    __asm {
        movdqa    xmm0, [eax]    // xmm0 = constant
        pcmpeqb    xmm0, xmm5    // xmm0 = _mm_cmpeq_epi8(xmm0, back)
        movdqa    xmm1, xmm3    // xmm0 = _mm_and_si128(xmm0, one)
        psubsb    xmm1, xmm6    // xmm1 = _mm_subs_epi8(xmm1, xmm6)
        pand    xmm0, xmm1    // xmm0 = _mm_and_si128(xmm0, one)
        por        xmm7, xmm0    // xmm7 = _mm_or_si128(xmm7, xmm0)
        add        eax, 0x10
    }
#else
    asm __volatile__ (
        "movdqa        (%%eax), %%xmm0\n\t"
        "pcmpeqb    %%xmm5, %%xmm0\n\t"
        "movdqa        %%xmm3, %%xmm1\n\t"
        "psubsb        %%xmm6, %%xmm1\n\t"
        "pand        %%xmm1, %%xmm0\n\t"
        "por        %%xmm0, %%xmm7\n\t"
        "add        $0x10, %%eax\n\t"
        : : :
    );
#endif
}

typedef void (*xmmRuleFunction)();
xmmRuleFunction xmmRuleFunctions[9];
#endif

//TODO:状態のエネルギーを計算する関数を記述する
//     状態はエネルギーの低い所に遷移する点に注意する
ALIGNED(char buffer[2][128][128]);
double STATE::energy() {
    const __m128i* constant = CONSTANTS;

#ifdef ENABLE_JIT
	const void* jitCode = JIT_CODE;
#endif
    
	//cerr << "numberOfChanges:" << positions.size() << endl;

    for (int r = 1; r <= R; ++r) {
        const __m128i* pointerGrid = (__m128i*)&grid[r][0];
        __m128i* pointerBack = (__m128i*)&buffer[1][r][0];

        for (int c = 0; c < C; c += 16, ++pointerBack, ++pointerGrid) {
            __m128i xmm0 = _mm_load_si128(pointerGrid);
            _mm_store_si128(pointerBack, xmm0);
        }
    }

    int front = 0;
    int back = 1;
	
#ifdef _MSC_VER
    __asm {
        pxor    xmm4, xmm4
        movdqa    xmm3, ONE
    }
#else
    __asm__ __volatile__ (
        "pxor    %%xmm4, %%xmm4\n\t"
        "movdqa    (%0), %%xmm3"
        : : "r"(&ONE) :
    );
#endif

	REP(k, K) {
        for (int c = 0; c < C; c += 16) {
            __m128i temp;
            temp = _mm_load_si128((__m128i*)&buffer[back][R][c]);
            _mm_store_si128((__m128i*)&buffer[back][0][c], temp);
            temp = _mm_load_si128((__m128i*)&buffer[back][1][c]);
            _mm_store_si128((__m128i*)&buffer[back][R + 1][c], temp);
        }
        for (int r = 0; r <= R + 1; ++r) {
            buffer[back][r][-1] = buffer[back][r][C - 1];
            buffer[back][r][C] = buffer[back][r][0];
        }

        for (int r = 1; r <= R; ++r) {
            char* centerBack = &buffer[back][r][0];
            char* centerFront = &buffer[front][r][0];
			char* centerBackEnd = &buffer[back][r][C];

			for (; centerBack < centerBackEnd; centerBack += 16, centerFront += 16) {
				//xmm3 : ONE
				//xmm4 : ZERO
				//xmm5 : counter
				//xmm6 : back
				//xmm7 : front
				//fprintf(stderr, "before asm\n");

#ifdef _MSC_VER
                __asm {
                    push	eax
                    mov		eax, centerBack
                    movdqu	xmm5, [eax-129]
                    movdqu	xmm0, [eax-128]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax-127]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax-1]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax+1]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax+127]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax+128]
                    paddb	xmm5, xmm0
                    movdqu	xmm0, [eax+129]
                    paddb	xmm5, xmm0

                    movdqu	xmm6, [eax]
                    pxor	xmm7, xmm7
					mov		eax, constant
                }
#else
                __asm__ __volatile__ (
                    "push	%%eax\n\t"
                    "mov    %0, %%eax\n\t"
                    "movdqu	-129(%%eax), %%xmm5\n\t"
                    "movdqu	-128(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"
                    "movdqu	-127(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"
                    "movdqu	-1(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"
                    "movdqu	1(%%eax), %%xmm0\n\t"
                    "paddb    %%xmm0, %%xmm5\n\t"
                    "movdqu	127(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"
                    "movdqu	128(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"
                    "movdqu	129(%%eax), %%xmm0\n\t"
                    "paddb	%%xmm0, %%xmm5\n\t"

                    "movdqu	(%%eax), %%xmm6\n\t"
                    "pxor	%%xmm7, %%xmm7\n\t"
                    "mov	%1, %%eax\n\t"
                    : : "b" (centerBack), "c" (constant) :
                );
#endif

				//fprintf(stderr, "after 1st asm\n");

#ifdef ENABLE_JIT
				((void(*)())JIT_CODE)();
#else
				xmmRuleFunctions[0]();
				xmmRuleFunctions[1]();
				xmmRuleFunctions[2]();
				xmmRuleFunctions[3]();
				xmmRuleFunctions[4]();
				xmmRuleFunctions[5]();
				xmmRuleFunctions[6]();
				xmmRuleFunctions[7]();
				xmmRuleFunctions[8]();
#endif
				
				//fprintf(stderr, "after 2nd asm\n");

#ifdef _MSC_VER
                __asm {
                    mov		eax, centerFront
                    movdqu	[eax], xmm7
                    pop		eax
                }
#else
                __asm__ __volatile__ (
                    "mov	%0, %%eax\n\t"
                    "movdqu	%%xmm7, (%%eax)\n\t"
                    "pop	%%eax\n\t"
                    : : "b" (centerFront) :
                );
#endif

				//fprintf(stderr, "after last asm\n");
            }
        }

        swap(front, back);
    }

    int counter = 0;
    for (int c = 0; c < C;) {
        __m128i sum = _mm_load_si128((__m128i*)&buffer[back][1][c]);
        for (int r = 2; r <= R; ++r) {
            const __m128i values = _mm_load_si128((__m128i*)&buffer[back][r][c]);
            sum = _mm_add_epi8(sum, values);
        }

        ALIGNED(unsigned char valueBuffer[16]);

        _mm_store_si128((__m128i*)valueBuffer, sum);

        const int length = min(16, C - c);
        counter += accumulate(valueBuffer, valueBuffer + length, 0);
        c += length;
    }

    for (int r = 1; r <= R; ++r) {
        const __m128i* pointerSrc = (__m128i*)&buffer[back][r][0];
        __m128i* pointerDst = (__m128i*)&lastGridEnd[r][0];

        for (int c = 0; c < C; c += 16, ++pointerSrc, ++pointerDst) {
            __m128i xmm0 = _mm_load_si128(pointerSrc);
            _mm_store_si128(pointerDst, xmm0);
        }
    }

    return -counter;
}

#ifdef ENABLE_JIT
static const unsigned char jitRule0[] = {
    0x66, 0x0F, 0x6F, 0x00,
    0x66, 0x0F, 0x74, 0xC5,
    0x66, 0x0F, 0xDB, 0xC6,
    0x66, 0x0F, 0xEB, 0xF8,
};
static const unsigned char jitRule1[] = {
    0x66, 0x0F, 0x6F, 0x00,
    0x66, 0x0F, 0x74, 0xC5,
    0x66, 0x0F, 0xDB, 0xC3,
    0x66, 0x0F, 0xEB, 0xF8,
};
static const unsigned char jitRule3[] = {
    0x66, 0x0F, 0x6F, 0x00,
    0x66, 0x0F, 0x74, 0xC5,
    0x66, 0x0F, 0x6F, 0xCB,
    0x66, 0x0F, 0xE8, 0xCE,
    0x66, 0x0F, 0xDB, 0xC1,
    0x66, 0x0F, 0xEB, 0xF8,
};
#endif

#ifdef _MSC_VER
#include <Windows.h>
#else
#include <sys/mman.h>
#include <errno.h>
#include <stdio.h>
#endif

struct CellularAutomaton {
    vector <string> configure(vector <string> grid, string rules, int N, int K) {
        ::initialGrid = grid;

#ifdef ENABLE_JIT
#ifdef _MSC_VER
		JIT_CODE = VirtualAlloc(NULL, JIT_CODE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
		JIT_CODE = mmap(0, JIT_CODE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
#endif

		int addTable[9];
		int lastValid = -1;
		for (int i = 8; i >= 0; --i) {
			addTable[i] = lastValid;
			if (rules[i] != '-') {
				lastValid = i;
			}
		}

		unsigned char* jitCode = (unsigned char*)JIT_CODE;

		if (rules[0] == '-') {
			if (addTable[0] != -1) {
				*jitCode++ = 0x83;
				*jitCode++ = 0xC0;
				*jitCode++ = addTable[0] * 0x10;
			}
		}

		REP(i, 9) {
            switch (rules[i]){
            case '=':
				memcpy(jitCode, jitRule0, sizeof(jitRule0));
				jitCode += sizeof(jitRule0);
				if (addTable[i] != -1) {
					*jitCode++ = 0x83;
					*jitCode++ = 0xC0;
					*jitCode++ = (addTable[i] - i) * 0x10;
				}
                break;
            case '+':
				memcpy(jitCode, jitRule1, sizeof(jitRule1));
				jitCode += sizeof(jitRule1);
				if (addTable[i] != -1) {
					*jitCode++ = 0x83;
					*jitCode++ = 0xC0;
					*jitCode++ = (addTable[i] - i) * 0x10;
				}
                break;
            case 'X':
				memcpy(jitCode, jitRule3, sizeof(jitRule3));
				jitCode += sizeof(jitRule3);
				if (addTable[i] != -1) {
					*jitCode++ = 0x83;
					*jitCode++ = 0xC0;
					*jitCode++ = (addTable[i] - i) * 0x10;
				}
                break;
            }
        }
		*jitCode++ = 0xC3;
#else
		REP(i, 9) {
            switch (rules[i]){
            case '=':
				xmmRuleFunctions[i] = xmmRule0;
                break;
            case '+':
				xmmRuleFunctions[i] = xmmRule1;
                break;
            case '-':
				xmmRuleFunctions[i] = xmmRule2;
                break;
            case 'X':
				xmmRuleFunctions[i] = xmmRule3;
                break;
            }
        }
#endif

		::N = N;
        ::K = K;
        ::R = grid.size();
        ::C = grid[0].size();

        STATE state = SimulatedAnnealing().run();
        cerr << "size:" << R << "x" << C << "=" << R * C << endl;
        const double energy = state.energy();
        cerr << energy << endl;
        cerr << "energy:" << (energy / (R * C)) << endl;

#ifdef ENABLE_JIT
#ifdef _MSC_VER
		VirtualFree(JIT_CODE, 0, MEM_DECOMMIT);
		JIT_CODE = NULL;
#else
		munmap(JIT_CODE, JIT_CODE_SIZE);
		JIT_CODE = NULL;
#endif
#endif

		return state.getGrid();
    }
};

トラックバック - http://topcoder.g.hatena.ne.jp/nodchip/20100708
 |