David A. Huffman
1951年,哈夫曼和他在MIT信息论的同学需要选择是完成学期报告还是期末考试。导师Robert M. Fano给他们的学期报告的题目是,寻找最有效的二进制编码。由于无法证明哪个已有编码是最有效的,哈夫曼放弃对已有编码的研究,转向新的探索,最终发现了基于有序频率二叉树编码的想法,并很快证明了这个方法是最有效的。由于这个算法,学生终于青出于蓝,超过了他那曾经和信息论创立者香农共同研究过类似编码的导师。哈夫曼使用自底向上的方法构建二叉树,避免了次优算法Shannon-Fano编码的最大弊端──自顶向下构建树。
1952年,David A. Huffman在麻省理工攻读博士时发表了《一种构建极小多余编码的方法》,一文,它一般就叫做Huffman编码。A Method for the Construction of Minimum-Redundancy Codeshttps://www.ias.ac.in/article/fulltext/reso/011/02/0091-0099
Huffman在1952年根据香农(Shannon)在1948年和范若(Fano)在1949年阐述的这种编码思想提出了一种不定长编码的方法,也称霍夫曼(Huffman)编码。霍夫曼编码的基本方法是先对图像数据扫描一遍,计算出各种像素出现的概率,按概率的大小指定不同长度的唯一码字,由此得到一张该图像的霍夫曼码表。编码后的图像数据记录的是每个像素的码字,而码字与实际像素值的对应关系记录在码表中。
赫夫曼编码是可变字长编码(VLC)的一种。 Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长 度最短的码字,有时称之为最佳编码,一般就称Huffman编码。下面引证一个定理,该定理保证了按字符出现概率分配码长,可使平均码长最短。
using System;
using System.IO;
using System.Text;
using System.Linq;
using System.Collections;
using System.Collections.Generic;
namespace Legalsoft.Truffer.Algorithm
{
///
/// 哈夫曼编码的压缩与解压缩
///
public class HuffmanCompressor
{
private HuffmanNode oRootHuffmanNode { get; set; } = null;
private List
private List
{
List
Value.ToList().ForEach(m => oHuffmanNodes[m].Weight++);
oHuffmanNodes = oHuffmanNodes
.Where(m => (m.Weight > 0))
.OrderBy(m => (m.Weight))
.ThenBy(m => (m.Value))
.ToList();
oHuffmanNodes = UpdateNodeParents(oHuffmanNodes);
oRootHuffmanNode = oHuffmanNodes[0];
oHuffmanNodes.Clear();
SortNodes(oRootHuffmanNode, oHuffmanNodes);
return oHuffmanNodes;
}
public void Compress(string FileName)
{
FileInfo oFileInfo = new FileInfo(FileName);
if (oFileInfo.Exists == true)
{
string sFileContents = "";
using (StreamReader oStreamReader = new StreamReader(File.OpenRead(FileName)))
{
sFileContents = oStreamReader.ReadToEnd();
}
List
oValueHuffmanNodes = oHuffmanNodes
.Where(m => (m.Value.HasValue == true))
.OrderBy(m => (m.BinaryWord))
.ToList();
Dictionary
foreach (HuffmanNode oHuffmanNode in oValueHuffmanNodes)
{
oCharToBinaryWordDictionary.Add(oHuffmanNode.Value.Value, oHuffmanNode.BinaryWord);
}
StringBuilder oStringBuilder = new StringBuilder();
List
for (int i = 0; i < sFileContents.Length; i++)
{
string sWord = "";
oStringBuilder.Append(oCharToBinaryWordDictionary[sFileContents[i]]);
while (oStringBuilder.Length >= 8)
{
sWord = oStringBuilder.ToString().Substring(0, 8);
oStringBuilder.Remove(0, sWord.Length);
}
if (String.IsNullOrEmpty(sWord) == false)
{
oByteList.Add(Convert.ToByte(sWord, 2));
}
}
if (oStringBuilder.Length > 0)
{
string sWord = oStringBuilder.ToString();
if (String.IsNullOrEmpty(sWord) == false)
{
oByteList.Add(Convert.ToByte(sWord, 2));
}
}
string sCompressedFileName = Path.Combine(oFileInfo.Directory.FullName, String.Format("{0}.compressed", oFileInfo.Name.Replace(oFileInfo.Extension, "")));
if (File.Exists(sCompressedFileName) == true)
{
File.Delete(sCompressedFileName);
}
using (FileStream oFileStream = File.OpenWrite(sCompressedFileName))
{
oFileStream.Write(oByteList.ToArray(), 0, oByteList.Count);
}
}
}
public void Decompress(string FileName)
{
FileInfo oFileInfo = new FileInfo(FileName);
if (oFileInfo.Exists == true)
{
string sCompressedFileName = String.Format("{0}.compressed", oFileInfo.FullName.Replace(oFileInfo.Extension, ""));
byte[] oBuffer = null;
using (FileStream oFileStream = File.OpenRead(sCompressedFileName))
{
oBuffer = new byte[oFileStream.Length];
oFileStream.Read(oBuffer, 0, oBuffer.Length);
}
HuffmanNode oZeroHuffmanNode = oRootHuffmanNode;
while (oZeroHuffmanNode.Left != null)
{
oZeroHuffmanNode = oZeroHuffmanNode.Left;
}
HuffmanNode oCurrentHuffmanNode = null;
StringBuilder oStringBuilder = new StringBuilder();
for (int i = 0; i < oBuffer.Length; i++)
{
string sBinaryWord = "";
byte oByte = oBuffer[i];
if (oByte == 0)
{
sBinaryWord = oZeroHuffmanNode.BinaryWord;
}
else
{
sBinaryWord = Convert.ToString(oByte, 2);
}
if ((sBinaryWord.Length < 8) && (i < (oBuffer.Length - 1)))
{
StringBuilder oBinaryStringBuilder = new StringBuilder(sBinaryWord);
while (oBinaryStringBuilder.Length < 8)
{
oBinaryStringBuilder.Insert(0, "0");
}
sBinaryWord = oBinaryStringBuilder.ToString();
}
for (int j = 0; j < sBinaryWord.Length; j++)
{
char cValue = sBinaryWord[j];
if (oCurrentHuffmanNode == null)
{
oCurrentHuffmanNode = oRootHuffmanNode;
}
if (cValue == '0')
{
oCurrentHuffmanNode = oCurrentHuffmanNode.Left;
}
else
{
oCurrentHuffmanNode = oCurrentHuffmanNode.Right;
}
if ((oCurrentHuffmanNode.Left == null) && (oCurrentHuffmanNode.Right == null))
{
oStringBuilder.Append(oCurrentHuffmanNode.Value.Value);
oCurrentHuffmanNode = null;
}
}
}
string sUncompressedFileName = Path.Combine(oFileInfo.Directory.FullName, String.Format("{0}.uncompressed", oFileInfo.Name.Replace(oFileInfo.Extension, "")));
if (File.Exists(sUncompressedFileName) == true)
{
File.Delete(sUncompressedFileName);
}
using (StreamWriter oStreamWriter = new StreamWriter(File.OpenWrite(sUncompressedFileName)))
{
oStreamWriter.Write(oStringBuilder.ToString());
}
}
}
private static List
{
List
for (int i = Char.MinValue; i < Char.MaxValue; i++)
{
oGetInitialNodeList.Add(new HuffmanNode((char)(i)));
}
return oGetInitialNodeList;
}
private static void SortNodes(HuffmanNode Node, List
{
if (Nodes.Contains(Node) == false)
{
Nodes.Add(Node);
}
if (Node.Left != null)
{
SortNodes(Node.Left, Nodes);
}
if (Node.Right != null)
{
SortNodes(Node.Right, Nodes);
}
}
private static List
{
while (Nodes.Count > 1)
{
int iOperations = (Nodes.Count / 2);
for (int iOperation = 0, i = 0, j = 1; iOperation < iOperations; iOperation++, i += 2, j += 2)
{
if (j < Nodes.Count)
{
HuffmanNode oParentHuffmanNode = new HuffmanNode(Nodes[i], Nodes[j]);
Nodes.Add(oParentHuffmanNode);
Nodes[i] = null;
Nodes[j] = null;
}
}
Nodes = Nodes
.Where(m => (m != null))
.OrderBy(m => (m.Weight))
.ToList();
}
return Nodes;
}
}
public class HuffmanNode
{
private string sBinaryWord { get; set; } = "";
private bool bIsLeftNode { get; set; } = false;
private bool bIsRightNode { get; set; } = false;
private HuffmanNode oLeft { get; set; } = null;
private HuffmanNode oParent { get; set; } = null;
private HuffmanNode oRight { get; set; } = null;
private char? cValue { get; set; } = ' ';
private int iWeight { get; set; } = 0;
public HuffmanNode()
{
}
public HuffmanNode(char Value)
{
cValue = Value;
}
public HuffmanNode(HuffmanNode Left, HuffmanNode Right)
{
oLeft = Left;
oLeft.oParent = this;
oLeft.bIsLeftNode = true;
oRight = Right;
oRight.oParent = this;
oRight.bIsRightNode = true;
iWeight = (oLeft.Weight + oRight.Weight);
}
public string BinaryWord
{
get
{
string sReturnValue = "";
if (String.IsNullOrEmpty(sBinaryWord) == true)
{
StringBuilder oStringBuilder = new StringBuilder();
HuffmanNode oHuffmanNode = this;
while (oHuffmanNode != null)
{
if (oHuffmanNode.bIsLeftNode == true)
{
oStringBuilder.Insert(0, "0");
}
if (oHuffmanNode.bIsRightNode == true)
{
oStringBuilder.Insert(0, "1");
}
oHuffmanNode = oHuffmanNode.oParent;
}
sReturnValue = oStringBuilder.ToString();
sBinaryWord = sReturnValue;
}
else
{
sReturnValue = sBinaryWord;
}
return sReturnValue;
}
}
public HuffmanNode Left
{
get
{
return oLeft;
}
}
public HuffmanNode Parent
{
get
{
return oParent;
}
}
public HuffmanNode Right
{
get
{
return oRight;
}
}
public char? Value
{
get
{
return cValue;
}
}
public int Weight
{
get
{
return iWeight;
}
set
{
iWeight = value;
}
}
public static int BinaryStringToInt32(string Value)
{
int iBinaryStringToInt32 = 0;
for (int i = (Value.Length - 1), j = 0; i >= 0; i--, j++)
{
iBinaryStringToInt32 += ((Value[j] == '0' ? 0 : 1) * (int)(Math.Pow(2, i)));
}
return iBinaryStringToInt32;
}
public override string ToString()
{
StringBuilder sb = new StringBuilder();
if (cValue.HasValue == true)
{
sb.AppendFormat("'{0}' ({1}) - {2} ({3})", cValue.Value, iWeight, BinaryWord, BinaryStringToInt32(BinaryWord));
}
else
{
if ((oLeft != null) && (oRight != null))
{
if ((oLeft.Value.HasValue == true) && (oRight.Value.HasValue == true))
{
sb.AppendFormat("{0} + {1} ({2})", oLeft.Value, oRight.Value, iWeight);
}
else
{
sb.AppendFormat("{0}, {1} - ({2})", oLeft, oRight, iWeight);
}
}
else
{
sb.Append(iWeight);
}
}
return sb.ToString();
}
}
}
- using System;
- using System.IO;
- using System.Text;
- using System.Linq;
- using System.Collections;
- using System.Collections.Generic;
-
- namespace Legalsoft.Truffer.Algorithm
- {
- ///
- /// 哈夫曼编码的压缩与解压缩
- ///
- public class HuffmanCompressor
- {
- private HuffmanNode oRootHuffmanNode { get; set; } = null;
- private List
oValueHuffmanNodes { get; set; } = null; -
- private List
BuildBinaryTree(string Value) - {
- List
oHuffmanNodes = GetInitialNodeList(); -
- Value.ToList().ForEach(m => oHuffmanNodes[m].Weight++);
-
- oHuffmanNodes = oHuffmanNodes
- .Where(m => (m.Weight > 0))
- .OrderBy(m => (m.Weight))
- .ThenBy(m => (m.Value))
- .ToList();
-
- oHuffmanNodes = UpdateNodeParents(oHuffmanNodes);
-
- oRootHuffmanNode = oHuffmanNodes[0];
- oHuffmanNodes.Clear();
-
- SortNodes(oRootHuffmanNode, oHuffmanNodes);
-
- return oHuffmanNodes;
- }
-
- public void Compress(string FileName)
- {
- FileInfo oFileInfo = new FileInfo(FileName);
-
- if (oFileInfo.Exists == true)
- {
- string sFileContents = "";
-
- using (StreamReader oStreamReader = new StreamReader(File.OpenRead(FileName)))
- {
- sFileContents = oStreamReader.ReadToEnd();
- }
-
- List
oHuffmanNodes = BuildBinaryTree(sFileContents); -
- oValueHuffmanNodes = oHuffmanNodes
- .Where(m => (m.Value.HasValue == true))
- .OrderBy(m => (m.BinaryWord))
- .ToList();
-
- Dictionary<char, string> oCharToBinaryWordDictionary = new Dictionary<char, string>();
- foreach (HuffmanNode oHuffmanNode in oValueHuffmanNodes)
- {
- oCharToBinaryWordDictionary.Add(oHuffmanNode.Value.Value, oHuffmanNode.BinaryWord);
- }
-
- StringBuilder oStringBuilder = new StringBuilder();
- List<byte> oByteList = new List<byte>();
- for (int i = 0; i < sFileContents.Length; i++)
- {
- string sWord = "";
-
- oStringBuilder.Append(oCharToBinaryWordDictionary[sFileContents[i]]);
-
- while (oStringBuilder.Length >= 8)
- {
- sWord = oStringBuilder.ToString().Substring(0, 8);
-
- oStringBuilder.Remove(0, sWord.Length);
- }
-
- if (String.IsNullOrEmpty(sWord) == false)
- {
- oByteList.Add(Convert.ToByte(sWord, 2));
- }
- }
-
- if (oStringBuilder.Length > 0)
- {
- string sWord = oStringBuilder.ToString();
-
- if (String.IsNullOrEmpty(sWord) == false)
- {
- oByteList.Add(Convert.ToByte(sWord, 2));
- }
- }
-
- string sCompressedFileName = Path.Combine(oFileInfo.Directory.FullName, String.Format("{0}.compressed", oFileInfo.Name.Replace(oFileInfo.Extension, "")));
- if (File.Exists(sCompressedFileName) == true)
- {
- File.Delete(sCompressedFileName);
- }
-
- using (FileStream oFileStream = File.OpenWrite(sCompressedFileName))
- {
- oFileStream.Write(oByteList.ToArray(), 0, oByteList.Count);
- }
- }
- }
-
- public void Decompress(string FileName)
- {
- FileInfo oFileInfo = new FileInfo(FileName);
-
- if (oFileInfo.Exists == true)
- {
- string sCompressedFileName = String.Format("{0}.compressed", oFileInfo.FullName.Replace(oFileInfo.Extension, ""));
-
- byte[] oBuffer = null;
- using (FileStream oFileStream = File.OpenRead(sCompressedFileName))
- {
- oBuffer = new byte[oFileStream.Length];
- oFileStream.Read(oBuffer, 0, oBuffer.Length);
- }
-
- HuffmanNode oZeroHuffmanNode = oRootHuffmanNode;
- while (oZeroHuffmanNode.Left != null)
- {
- oZeroHuffmanNode = oZeroHuffmanNode.Left;
- }
-
- HuffmanNode oCurrentHuffmanNode = null;
- StringBuilder oStringBuilder = new StringBuilder();
-
- for (int i = 0; i < oBuffer.Length; i++)
- {
- string sBinaryWord = "";
- byte oByte = oBuffer[i];
-
- if (oByte == 0)
- {
- sBinaryWord = oZeroHuffmanNode.BinaryWord;
- }
- else
- {
- sBinaryWord = Convert.ToString(oByte, 2);
- }
-
- if ((sBinaryWord.Length < 8) && (i < (oBuffer.Length - 1)))
- {
- StringBuilder oBinaryStringBuilder = new StringBuilder(sBinaryWord);
- while (oBinaryStringBuilder.Length < 8)
- {
- oBinaryStringBuilder.Insert(0, "0");
- }
-
- sBinaryWord = oBinaryStringBuilder.ToString();
- }
-
- for (int j = 0; j < sBinaryWord.Length; j++)
- {
- char cValue = sBinaryWord[j];
-
- if (oCurrentHuffmanNode == null)
- {
- oCurrentHuffmanNode = oRootHuffmanNode;
- }
-
- if (cValue == '0')
- {
- oCurrentHuffmanNode = oCurrentHuffmanNode.Left;
- }
- else
- {
- oCurrentHuffmanNode = oCurrentHuffmanNode.Right;
- }
-
- if ((oCurrentHuffmanNode.Left == null) && (oCurrentHuffmanNode.Right == null))
- {
- oStringBuilder.Append(oCurrentHuffmanNode.Value.Value);
- oCurrentHuffmanNode = null;
- }
- }
- }
-
- string sUncompressedFileName = Path.Combine(oFileInfo.Directory.FullName, String.Format("{0}.uncompressed", oFileInfo.Name.Replace(oFileInfo.Extension, "")));
-
- if (File.Exists(sUncompressedFileName) == true)
- {
- File.Delete(sUncompressedFileName);
- }
-
- using (StreamWriter oStreamWriter = new StreamWriter(File.OpenWrite(sUncompressedFileName)))
- {
- oStreamWriter.Write(oStringBuilder.ToString());
- }
- }
- }
-
- private static List
GetInitialNodeList() - {
- List
oGetInitialNodeList = new List(); -
- for (int i = Char.MinValue; i < Char.MaxValue; i++)
- {
- oGetInitialNodeList.Add(new HuffmanNode((char)(i)));
- }
-
- return oGetInitialNodeList;
- }
-
- private static void SortNodes(HuffmanNode Node, List
Nodes ) - {
- if (Nodes.Contains(Node) == false)
- {
- Nodes.Add(Node);
- }
-
- if (Node.Left != null)
- {
- SortNodes(Node.Left, Nodes);
- }
-
- if (Node.Right != null)
- {
- SortNodes(Node.Right, Nodes);
- }
- }
-
- private static List
UpdateNodeParents(List Nodes ) - {
- while (Nodes.Count > 1)
- {
- int iOperations = (Nodes.Count / 2);
- for (int iOperation = 0, i = 0, j = 1; iOperation < iOperations; iOperation++, i += 2, j += 2)
- {
- if (j < Nodes.Count)
- {
- HuffmanNode oParentHuffmanNode = new HuffmanNode(Nodes[i], Nodes[j]);
- Nodes.Add(oParentHuffmanNode);
-
- Nodes[i] = null;
- Nodes[j] = null;
- }
- }
-
- Nodes = Nodes
- .Where(m => (m != null))
- .OrderBy(m => (m.Weight))
- .ToList();
- }
-
- return Nodes;
- }
- }
-
- public class HuffmanNode
- {
- private string sBinaryWord { get; set; } = "";
- private bool bIsLeftNode { get; set; } = false;
- private bool bIsRightNode { get; set; } = false;
- private HuffmanNode oLeft { get; set; } = null;
- private HuffmanNode oParent { get; set; } = null;
- private HuffmanNode oRight { get; set; } = null;
- private char? cValue { get; set; } = ' ';
- private int iWeight { get; set; } = 0;
-
- public HuffmanNode()
- {
- }
-
- public HuffmanNode(char Value)
- {
- cValue = Value;
- }
-
- public HuffmanNode(HuffmanNode Left, HuffmanNode Right)
- {
- oLeft = Left;
- oLeft.oParent = this;
- oLeft.bIsLeftNode = true;
-
- oRight = Right;
- oRight.oParent = this;
- oRight.bIsRightNode = true;
-
- iWeight = (oLeft.Weight + oRight.Weight);
- }
-
- public string BinaryWord
- {
- get
- {
- string sReturnValue = "";
-
- if (String.IsNullOrEmpty(sBinaryWord) == true)
- {
- StringBuilder oStringBuilder = new StringBuilder();
-
- HuffmanNode oHuffmanNode = this;
-
- while (oHuffmanNode != null)
- {
- if (oHuffmanNode.bIsLeftNode == true)
- {
- oStringBuilder.Insert(0, "0");
- }
-
- if (oHuffmanNode.bIsRightNode == true)
- {
- oStringBuilder.Insert(0, "1");
- }
-
- oHuffmanNode = oHuffmanNode.oParent;
- }
-
- sReturnValue = oStringBuilder.ToString();
- sBinaryWord = sReturnValue;
- }
- else
- {
- sReturnValue = sBinaryWord;
- }
-
- return sReturnValue;
- }
- }
-
- public HuffmanNode Left
- {
- get
- {
- return oLeft;
- }
- }
-
- public HuffmanNode Parent
- {
- get
- {
- return oParent;
- }
- }
-
- public HuffmanNode Right
- {
- get
- {
- return oRight;
- }
- }
-
- public char? Value
- {
- get
- {
- return cValue;
- }
- }
-
- public int Weight
- {
- get
- {
- return iWeight;
- }
- set
- {
- iWeight = value;
- }
- }
-
- public static int BinaryStringToInt32(string Value)
- {
- int iBinaryStringToInt32 = 0;
-
- for (int i = (Value.Length - 1), j = 0; i >= 0; i--, j++)
- {
- iBinaryStringToInt32 += ((Value[j] == '0' ? 0 : 1) * (int)(Math.Pow(2, i)));
- }
-
- return iBinaryStringToInt32;
- }
-
- public override string ToString()
- {
- StringBuilder sb = new StringBuilder();
-
- if (cValue.HasValue == true)
- {
- sb.AppendFormat("'{0}' ({1}) - {2} ({3})", cValue.Value, iWeight, BinaryWord, BinaryStringToInt32(BinaryWord));
- }
- else
- {
- if ((oLeft != null) && (oRight != null))
- {
- if ((oLeft.Value.HasValue == true) && (oRight.Value.HasValue == true))
- {
- sb.AppendFormat("{0} + {1} ({2})", oLeft.Value, oRight.Value, iWeight);
- }
- else
- {
- sb.AppendFormat("{0}, {1} - ({2})", oLeft, oRight, iWeight);
- }
- }
- else
- {
- sb.Append(iWeight);
- }
- }
-
- return sb.ToString();
- }
- }
- }