Sudoku!

#games

Project

小時候常玩的遊戲，但是一碰到需要假設數字很多的就會開始亂掉。
我的人工dfs只能塞大概4個堆疊而已。

一般我在手做數獨時都會用原本數字來限制某些格子的可能性，總有幾個可以刪到剩下一個，從1開始枚舉，用原本的數字開始限制，只要每次都可以寫上新的數字就不要進行假設。
基本上，在中等難度還可以用這套方式避開大量的假設。
但一旦到了難度偏高的時候，這種方法會因為數字開始變得稀疏而完全不管用。所以還是會有很多很多假設數字(腦中記憶體已用完)。

而現在我們有了電腦，只要可以設計出好的演算法就可以快速求解我花了兩天的數獨了。

窮舉
枚舉所有可能出現的數字。
對，就只是這樣而已。
但是也挺慢的。可以到好幾十秒。

窮舉+剪枝
當我們填上數字時，會導致那行、列與所在的大格能選的數字減少，也就是說原本每格都可以填上一些候補數，之後的格子則會受到先前的限制，當限制多到無法再某格填入任何數且整個數獨未完成的話，這一步即是錯的。
這個方法讓電腦可以不用枚舉一定不會有解的方向，速度必然會快上一點。

實作第一版:

每格都有一些候補數，當每次操作時都需要刪除其他格的候補數，或是拿到候補數，所以會需要能夠處理刪除、詢問的容器。
想了想就是C++ STL的set最適合。

// Sudoku-ver exhaustion.cpp
set<int> nineCell[4][4];
set<int> rSet[10];
set<int> cSet[10];

bool fix[10][10];
int table[10][10];

void SetXinCell(int r, int c, int x)
{
	nineCell[r / 3][c / 3].erase(x);
	rSet[r].erase(x);
	cSet[c].erase(x);
}

void RemoveXinCell(int r, int c, int x)
{
	nineCell[r / 3][c / 3].emplace(x);
	rSet[r].emplace(x);
	cSet[c].emplace(x);
}

#define Set(s) s.begin(), s.end()
set<int> findCandidate(int r, int c)
{
	set<int> tmp, res;
	set_intersection(Set(rSet[r]), Set(cSet[c]), inserter(tmp, tmp.begin()));
	set_intersection(Set(tmp), Set(nineCell[r / 3][c / 3]), inserter(res, res.end()));
	
	return res;
}

bool dfs(int r, int c)
{
	if (r == 9 && c == 0)
		return true;
	
	set<int> Candidates = findCandidate(r, c);
	
	for (auto it : Candidates)
	{
		SetXinCell(r, c, it);
		fix[r][c] = true;
		table[r][c] = it;
		
		int _r = r, _c = c;
		while (fix[_r][_c])
		{
			++_c;
			_r += _c / 9;
			_c %= 9;
		}
		
		if (dfs(_r, _c))
			return true;
		
		table[r][c] = 0;
		fix[r][c] = false;
		RemoveXinCell(r, c, it);
	}
	
	return false;
}

int main()
{
	for (int i = 0; i < 9; ++i)
		for (int j = 0; j < 9; ++j)
			for (int k = 1; k <= 9; ++k)
			{
				nineCell[i / 3][j / 3].emplace(k);
				rSet[i].emplace(k);
				cSet[j].emplace(k);
			}
	
	for (int i = 0; i < 9; ++i)
		for (int j = 0; j < 9; ++j)
		{
			cin >> table[i][j];
			if (table[i][j])
			{
				fix[i][j] = true;
				
				SetXinCell(i, j, table[i][j]);
			}
		}
	
	int r = 0, c = 0;
	while (fix[r][c])
	{
		++c;
		r += c / 9;
		c %= 9;
	}
	
	dfs(r, c);
	
	cout << '\n' << "answer:" << '\n';
	
	for (int i = 0; i < 9; ++i)
	{
		for (int j = 0; j < 9; ++j)
		{
			cout << table[i][j] << ' ';
			if (!((j + 1) % 3))
				cout << ' ';
		}
		cout << '\n';
		if (!((i + 1) % 3))
			cout << '\n';
	}
	return 0;
}

拿了十個Expert難度的數獨來測試:

: 3474 ms

: 4300 ms

: 1960 ms

: 10590 ms

: 2762 ms

: 181 ms

: 2041 ms

:1494 ms

:466 ms

:10205 ms

平均: 3747 ms
標準差: 3529

可以發現這種方式非常不穩定，原因是當我們填入數字時，如果他是對的，那我們在這一層就不會往下試數，反之。
而我們用的set是有序的，會由小到大枚舉，所以如果前面的數字正確答案是比較後面的數字，例如9，就會花費非常多時間。
為了不要讓他是有序的，我們可以通過random戳候補數，這樣可以讓期望時間變小。

同上面數據的結果:
第一次:
:3058 ms
:3995 ms
:3805 ms
:2411 ms
:2863 ms
:124 ms
:70 ms
:299 ms
:38 ms
:8382 ms

平均: 2504 ms
標準差: 2479

第二次:
:2914 ms
:3947 ms
:3681 ms
:2469 ms
:2766 ms
:122 ms
:72 ms
:296 ms
:35 ms
:8172 ms

平均: 2447 ms
標準差: 2416

變得比較穩定，總體來看也變快了。

不過這還可以優化，C++ STL的set底層通常採用紅黑樹實現
雖然期望複雜度很好，但是STL的封裝導致常數頗大。

每行、列、宮都是一個集合，且最多只有9個數字。
有沒有想到什麼東西阿，位元壓縮!

每個行、列、宮都可以壓縮成一個整數，第i位=1代表數字i是可選的，反之。
所以對於$(r, c)$的候補數就是集合$S_r\bigcap S_c\bigcap Cells_{r,c}$。
加入一個數字與刪除一個數字也不過是在三個集合中做xor操作罷了。
這種作法常數小了很多，且記憶體也非常小，使用int16_t的話，也不過幾KB。
可謂兼具時間與空間的作法。

// Sudoku-ver bit.cpp
int table[11][11];
bool fix[11][11];

int nineCell[4][4];
int rSet[10], cSet[10];

int findCandidate(int r, int c)
{
	return nineCell[r / 3][c / 3] & rSet[r] & cSet[c];
}

void SetXinCell(int r, int c, int x)
{
	rSet[r] ^= (1 << x);
	cSet[c] ^= (1 << x);
	nineCell[r / 3][c / 3] ^= (1 << x);
}

bool dfs(int r, int c)
{
	if (r == 9 && c == 0)
		return true;

	int candidate = findCandidate(r, c) & ((1 << 10) - 1) ^ 1;

	for (int i = 1; candidate; ++i)
	{
		if (candidate & (1 << i))
		{
			SetXinCell(r, c, i);
			fix[r][c] = true;
			table[r][c] = i;
			candidate ^= (1 << i);

			int _r = r, _c = c;
			while (fix[_r][_c])
			{
				++_c;
				_r += _c / 9;
				_c %= 9;
			}

			if (dfs(_r, _c))
				return true;

			table[r][c] = 0;
			fix[r][c] = false;
			SetXinCell(r, c, i);
		}
	}

	return false;
}

int main()
{
	memset(fix, false, sizeof(fix));
	memset(nineCell, -1, sizeof(nineCell));
	memset(rSet, -1, sizeof(rSet));
	memset(cSet, -1, sizeof(cSet));

	for (int i = 0; i < 9; ++i)
		for (int j = 0; j < 9; ++j)
		{
			cin >> table[i][j];
			if (table[i][j])
			{
				fix[i][j] = true;

				SetXinCell(i, j, table[i][j]);
			}
		}

	double START, END;
	START = clock();

	int r = 0, c = 0;
	
	while (fix[r][c])
	{
		++c;
		r += c / 9;
		c %= 9;
	}

	dfs(r, c);

	cout << "\nanswer:" << '\n';

	for (int i = 0; i < 9; ++i)
	{
		for (int j = 0; j < 9; ++j)
		{
			cout << table[i][j] << ' ';
			if (!((j + 1) % 3))
				cout << ' ';
		}
		cout << '\n';
		if (!((i + 1) % 3))
			cout << '\n';
	}

	END = clock();

	cout << "execution time: " << (END - START) << " ms" << '\n';
	return 0;
}

同樣依上面的測試數據:
:17 ms
:23 ms
:25 ms
:41 ms
:15 ms
:11 ms
:18 ms
:11 ms
:6 ms
:43 ms

平均: 21 ms
標準差: 12

這速度差了不只一個檔次，可見常數對於程式的影響多大。

DLX algorithm
網路上很多人都說是解數獨的最快算法。

詳細實作方式會另外寫一篇文章解釋。

測試結果:
:5 ms
:4 ms
:6 ms
:7 ms
:5 ms
:5 ms
:6 ms
:4 ms
:7 ms
:4 ms

平均: 5 ms
標準差: 1

看來真的是最快數獨算法了。
但是空間的使用也比bit版本多很多，大概10倍左右，是一個空間換取時間的優秀策略。

延伸話題:

數獨的終盤:
2005年由Bertram Felgenhauer和Frazer Jarvis計算出合法數獨的終盤共有6,670,903,752,021,072,936,960個，而如果將等價盤去掉，則有5,472,730,538個
而每個終盤又可以做出$\sum^{64}_{i=1} C^{81}_{i}$個題目，數字已經過於龐大，以至於對於我們沒甚麼意義(反正就是一輩子都寫不完)。
數獨的最少提示數:
已經被證明最少要有17個提示數才能使數獨是唯一解。
由Gary McGuire, Bastian Tugemann, Gilles Civario給出的證明: https://arxiv.org/pdf/1201.0749.pdf
各種腦動大開的數獨:
對角線數獨

鋸齒數獨

連體數獨

killer數獨