In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integerN (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, andf[i]
is the frequency ofc[i]
and is an integer no more than 1000. The next line gives a positive integerM (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
tips: 在学了信息论与编码的时候你会知道, 哈夫曼编码方法得到的码并非唯一,不同的哈夫曼编码他们的质量不同, 应该选取码方差较小。一般将合并的放在‘上面’,这样可获得较小的码方差。(在此题中未处理)
#include <bits/stdc++.h>
using namespace std;
typedef int Status;
typedef struct HTNode
{
unsigned int weight;
unsigned int parent, lchild, rchild;
}HTNode, *HuffmanTree;
//typedef char* *HuffmanCode;
void Select(HuffmanTree &HT, int index, int &pos1, int &pos2)
{
int minvalue1 , minvalue2;
pos1 = pos2 = index;
minvalue1 = minvalue2 = 100000;
for(int i = 1; i < index; ++i)
if(HT[i].parent == 0)
{
if(HT[i].weight <= minvalue1)
{
minvalue2 = minvalue1;
minvalue1 = HT[i].weight;
pos2 = pos1;
pos1 = i;
}
else if(HT[i].weight <= minvalue2)
{
pos2 = i;
minvalue2 = HT[i].weight;
}
}
}
int HuffmanCoding(int *w, int n)
{
int m = 2 * n - 1;
HuffmanTree HT = (HTNode *)malloc(sizeof(HTNode) * (m + 1));
HuffmanTree p = HT + 1;
w++;
for(int i = 1; i <= n; ++i, ++w, ++p)
{
p->weight = *w;
p->parent = p->lchild = p->rchild = 0;
}
for(int i = n + 1; i <= m; ++i, ++p)
p->weight = p->parent = p->lchild = p->rchild = 0;
p = HT + n + 1;
for(int i = n + 1; i <= m; ++i, ++p)
{
int pos1, pos2;
Select(HT, i, pos1, pos2);
p->weight = HT[pos1].weight + HT[pos2].weight;
p->lchild = pos1, p->rchild = pos2;
HT[pos1].parent = HT[pos2].parent = i;
//printf("lchild %d and right %d µÄ parentÊÇ %d\n", pos2, pos1, i);
}
int pathnum[n + 1];
for(int i = 1; i <= n; ++i)
{
int length = 0;
for(int cpos = i, ppos = HT[i].parent; ppos != 0; cpos = ppos, ppos = HT[ppos].parent)
{
// printf("cposµÄ%d parentÊÇ %d\n", cpos, ppos);
length++;
}
// printf("length = %d\n", length);
pathnum[i] = length;
}
int minweight = 0;
for(int i = 1; i <= n; i++)
minweight += (pathnum[i] * HT[i].weight);
return minweight;
}
int isUncertain(char test[][65], int n)
{
for(int i = 0; i < n; ++i)
for(int j = i + 1; j < n; ++j)
{
int length = strlen(test[i]) > strlen(test[j]) ? strlen(test[j]):strlen(test[i]);
int k;
for(k = 0; k < length; ++k)
if(test[i][k] != test[j][k])
break;
if(k == length)
return 1;
}
return 0;
}
int GetWeight(char test[][65], int *w, int n)
{
int weight = 0;
for(int i = 0; i < n; ++i)
{
int length = strlen(test[i]);
weight += (length * w[i + 1]);
}
return weight;
}
int main()
{
int N, M, minwight, W[70];
char elem;
scanf("%d", &N);
getchar();
for(int i =1; i <= N; ++i)
if(i <= N - 1)
scanf("%c %d ", &elem, &W[i]);
else
scanf("%c %d", &elem, &W[i]);
int minweight = HuffmanCoding(W, N);
scanf("%d", &M);
for(int i = 0; i < M; ++i)
{
//HuffmanCode Ques = (HuffmanCode)malloc((N + 1) * sizeof(char *));
char Ques[65][65];
for(int i = 0; i < N; ++i)
{
getchar();
scanf("%c %s", &elem, Ques[i]);
//printf("****%s\n", Ques[i]);
}
if(isUncertain(Ques, N))
printf("No\n");
else
{
if(minweight == GetWeight(Ques, W, N))
printf("Yes\n");
else
printf("No\n");
}
}
}