自己用JAVA手动实现的一个压缩类,不过效率是硬伤,还有一点小小的BUG,不过测试一些小的文件还是能通过的,一般在500kb左右,大了的话效率就有问题了,暂时还不知道如何解决、、、根据赫夫曼编码实现的、、、
import java.io.*;
public class Compression {
public static void main(String[] args) {
long startTime = System.currentTimeMillis(); // 获取开始时间
comp(); //压缩
long middleTime = System.currentTimeMillis(); // 获取中间时间
deComp(); //解压
long endTime = System.currentTimeMillis(); // 获取结束时间
p("");
p("压缩时间: " + (middleTime - startTime) + "ms");
p("解压时间: " + (endTime - middleTime) + "ms");
p("程序运行时间: " + (endTime - startTime) + "ms");
}
public static void comp() {
p("压缩文件开始、、、、、、");
File fin = null, fout = null;
int[] a = new int[255 + 1]; // 统计源数据中出现的次数,以此编码
int fileLength = 0;
int[] source; // 从文件读取的源数据
char[] cin; // 从文件读取的数据提取出不重复的数据
int countIn = 0;
// 读取文件
try {
fin = new File("F:\\Language\\Java\\Practice\\PracticeCompression\\yting10.txt");
FileInputStream in = new FileInputStream(fin);
fileLength = (int) fin.length();
source = new int[fileLength];
int tempSrc, lenTemp = 0;
while ((tempSrc = in.read()) != -1) {
source[lenTemp++] = tempSrc;
}
for (int i = 0; i < source.length; i++) {
a[source[i]]++;
}
// 统计a中不重复的有多少
for (int i = 0; i < a.length; i++) {
if (a[i] != 0) {
countIn++;
}
}
// 将a中不重复的复制到cin中去
cin = new char[countIn];
for (int i = 0, k = 0; i < a.length; i++) {
if (a[i] != 0) {
cin[k++] = (char) i;
}
}
// 构造赫夫曼树huff
// if(n<=1) return ;
int i, j1, m, n;
n = countIn;
m = 2 * n - 1;
HuffmanTree[] huff = new HuffmanTree[m + 1];
// 将1~m号单元中的双亲,左孩子,右孩子的下标都初始化为0
for (i = 1; i <= m; i++) {
huff[i] = new HuffmanTree();
huff[i].parent = 0;
huff[i].lchild = 0;
huff[i].rchild = 0;
}
// 输入
// 输入前n个单元中叶子结点的权值
// 结点的权值;
j1 = 0;
for (i = 1; i <= n; i++) {
huff[i].weight = a[cin[j1]];
j1++;
}
// 初始化工作结束,下面开始创建赫夫曼树
int s1, s2, t, min1 = -1, min2 = -1, j;
for (i = n + 1; i <= m; i++) {
// 在huff[j] (1<=j<=i-1)中选择两个双亲域为0且权值最小的结点
// 并返回他们在huff中的序号s1和s2
// //
// Select(huff,i-1,s1,s2);
min1 = 0;
min2 = -1;
// 为min1赋初值
for (j = 1; j <= i - 1; j++) {
if (huff[j].parent == 0) {
min1 = j;
break;
}
}
// 找出s1
for (j = 1; j <= i - 1; j++) {
if (huff[j].parent == 0) {
if (huff[min1].weight > huff[j].weight) {
min1 = j;
}
}
}
// 为min2赋初值
for (j = 1; j <= i - 1; j++) {
if (huff[j].parent == 0 && j != min1) {
min2 = j;
break;
}
}
// 找出s2
for (j = 1; j <= i - 1; j++) {
if (huff[j].parent == 0 && j != min1) {
if (huff[min2].weight > huff[j].weight) {
min2 = j;
}
}
}
s1 = min1;
s2 = min2;
// //
// 得到新结点i,从森林中删除s1,s2,将s1和s2的双亲域由0改为i
huff[s1].parent = i;
huff[s2].parent = i;
// s1,s2分别作为i的左右孩子
if (s1 > s2) {
t = s1;
s1 = s2;
s2 = t;
}
huff[i].lchild = s1;
huff[i].rchild = s2;
// i的权值为左右孩子权值之和
huff[i].weight = huff[s1].weight + huff[s2].weight;
}
// ///
char[][] HuffmanCode;
// 从叶子到根逆向求每个字符的赫夫曼编码。存储在编码表HuffmanCode中
char[] cd;
int itemp, start, c, ftemp;
HuffmanCode = new char[n + 1][]; //分配n+1个字符编码的空间
cd = new char[n - 1]; //分配临时存放编码的动态数组空间
for (itemp = 1; itemp <= n; itemp++) { // 逐个字符求赫夫曼编码
start = n - 1; // start开始指向最后,即编码结束符位置
c = itemp;
ftemp = huff[itemp].parent; // f指向结点c的双亲
while (ftemp != 0) { // 从叶子结点开始向上回溯,直到根结点
--start; // 回溯一次start向前指一个位置
if (huff[ftemp].lchild == c)
cd[start] = '0'; // 结点c是f的左孩子,则生成代码0
else
cd[start] = '1'; // 结点c是f的右孩子,则生成代码1
c = ftemp; // 继续向上回溯
ftemp = huff[ftemp].parent;
} // 求出第i个字符的编码
HuffmanCode[itemp] = new char[n - start - 1]; //为第i个字符编码分配空间
//将求得的编码从临时空间cd复制到HC的当前行中
for (int k1 = 0, k2 = 0; k1 < cd.length; k1++) {
if (cd[k1] == '0' || cd[k1] == '1') {
HuffmanCode[itemp][k2] = cd[k1];
k2++;
}
}
cd = new char[n - 1]; // 分配新空间
}
// //
char[] huffmanCodeCopy;
int lenHuff = 0;
// 求出lenhuff
for (int it = 0, k; it < source.length; it++) {
k = 0;
for (int jt = 0; jt < cin.length; jt++) {
if (source[it] == cin[jt]) {
while (HuffmanCode[jt + 1].length > k) {
k++;
lenHuff++;
}
}
}
}
// 为huffmanCodeCopy初始化
huffmanCodeCopy = new char[lenHuff];
lenHuff = 0;
for (int it = 0, k; it < source.length; it++) {
k = 0;
for (int jt = 0; jt < cin.length; jt++) {
if (source[it] == cin[jt]) {
while (HuffmanCode[jt + 1].length > k) {
huffmanCodeCopy[lenHuff] = HuffmanCode[jt + 1][k];
k++;
lenHuff++;
}
}
}
}
// 缩位
int tf;
tf = huffmanCodeCopy.length / 8 + 1;
char[] finallyCode = new char[tf];
char[] temp = new char[8];
String strTem;
int tp, fLen = 0;
int it, ij;
for (it = 0, ij = 0; it < huffmanCodeCopy.length; it++) {
if (ij < 8) {
if (it < huffmanCodeCopy.length) {
temp[ij] = huffmanCodeCopy[it];
ij++;
}
} else if (ij >= 8) {
strTem = new String(temp);
tp = Integer.parseInt(strTem, 2);
ij = 0;
temp = new char[8];
finallyCode[fLen++] = (char) tp;
it--;
}
}
/*for (int item = 0; item < temp.length; item++) {
pw(temp[item] + " ");
}*/
if ((temp[0] == '0' || temp[0] == '1') && (temp[7] !='0' && temp[7] !='1')) {
//p("aaaaaaaa");
for (int item = 0; item < 8; item++) {
if (temp[item] != '0' && temp[item] != '1') {
temp[item] = '1';
}
}
strTem = new String(temp);
tp = Integer.parseInt(strTem, 2);
finallyCode[fLen++] = (char) tp;
}
// //
// 将压缩好的东西存到文件中去前的准备
// 先转换HuffmanCode
fout = new File("F:\\Language\\Java\\Practice\\PracticeCompression\\yting_compression.txt");
FileOutputStream fo = new FileOutputStream(fout);
DataOutputStream dp = new DataOutputStream(fo);
int huffmanCompLen = 0;
int[] huffmanComp = new int[HuffmanCode.length - 1];
String strTemp, flags = "1";
StringBuffer strBuf;
for (int item = 1; item < HuffmanCode.length; item++) {
strTemp = new String(HuffmanCode[item]);
strBuf = new StringBuffer(strTemp);
strBuf.insert(0, '1');
strTemp = strBuf.toString();
huffmanComp[huffmanCompLen] = (int) Integer.parseInt(strTemp, 2);
huffmanCompLen++;
}
// 开始压缩到文件中去
/*
* 编码方式 cinLen huffmanCompLen cin huffmanComp finallyCode
*/
// 写入cinLen
dp.writeInt(cin.length);
// 写入huffmanCompLen
dp.writeInt(huffmanComp.length);
// cin
// 写入huffmanC,也就是cin,但是cin是char[]的
for (int item = 0; item < cin.length; item++) {
dp.writeChar(cin[item]);
}
// 写入huffmanComp
for (int item = 0; item < huffmanComp.length; item++) {
dp.writeInt(huffmanComp[item]);
}
// 写入finallyCode
for (int item = 0; item < finallyCode.length; item++) {
dp.write(finallyCode[item]);
}
dp.close();
p("压缩文件结束、、、、、、");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void deComp() {
p("解压文件开始、、、、、、");
int cinLen, huffmanCompLen, huffmanStrLen, finallyCodeLen;
char[] cin;
int[] huffmanCompTemp, huffmanComp;
int[] finallyCode;
String[] huffmanStr;
DataInputStream di;
try {
File f = new File(
"F:\\Language\\Java\\Practice\\PracticeCompression\\yting_compression.txt");
FileInputStream fi = new FileInputStream(f);
di = new DataInputStream(fi);
// 开始读取
// 读取cinLen
cinLen = di.readInt();
// 读取huffmanCompLen
huffmanCompLen = di.readInt();
// 初始化cin,huffmanCompTemp,huffmanComp
cin = new char[cinLen];
huffmanCompTemp = new int[huffmanCompLen];
huffmanComp = new int[huffmanCompLen];
huffmanStr = new String[huffmanCompLen];
// 读取cin
for (int item = 0; item < cinLen; item++) {
cin[item] = di.readChar();
}
// 读取huffmanComp
for (int item = 0; item < huffmanCompLen; item++) {
huffmanCompTemp[item] = di.readInt();
}
// 初始化finallyCode并读取finallyCode
finallyCodeLen = 0;
finallyCode = new int[((int) f.length() - 4 - 4 - cinLen * 2 - huffmanCompLen * 4)];
int temp, flag = 1;
while ((temp = di.read()) != -1) {
finallyCode[finallyCodeLen] = temp;
finallyCodeLen++;
flag++;
if (flag == 10) {
flag = 1;
}
}
// 将huffmanCompTemp转换成二进制,因为huffmanCompTemp是整数
String tempChange;
huffmanStrLen = 0;
for (int item = 0; item < huffmanCompTemp.length; item++) {
tempChange = Integer.toBinaryString(huffmanCompTemp[item]);
// 将前面的1去除
tempChange = tempChange.substring(1);
huffmanStr[huffmanStrLen] = tempChange;
huffmanStrLen++;
}
// 将finallyCode装换成二进制,因为它的每个字节都是整数(0~255)
StringBuffer huffmanCode = new StringBuffer();
StringBuffer strtem = new StringBuffer();
for (int item = 0; item < finallyCode.length; item++) {
strtem.append(Integer.toBinaryString(finallyCode[item]));
while (strtem.length() < 8) {
strtem.insert(0, "0");
}
huffmanCode.append(strtem);
strtem = new StringBuffer();
}
String tempCode;
// /
// 将huffmanstr跟cin排序,以提高效率
String strt = null;
char strc;
for(int item=1; item<huffmanStr.length;item++){
for(int jtem=0; jtem<huffmanStr.length-item; jtem++){
if(huffmanStr[jtem].length() > huffmanStr[jtem + 1].length()){
strt = huffmanStr[jtem];
huffmanStr[jtem] = huffmanStr[jtem + 1];
huffmanStr[jtem + 1] = strt; strt = null;
strc = cin[jtem];
cin[jtem] = cin[jtem+1];
cin[jtem+1] = strc;
}
}
}
/*for(int item=0; item<huffmanStr.length; item++){
p(huffmanStr[item]);
}*/
// /
// 开始将huffmanCode中的二进制数据东西还原成字节了
File f2 = new File("F:\\Language\\Java\\Practice\\PracticeCompression\\yting_finally.txt");
FileOutputStream fo = new FileOutputStream(f2);
DataOutputStream dop = new DataOutputStream(fo);
p("开始将解压的文件存入、、、、、、");
for (int item = 1, flags = 0; item < huffmanCode.length(); item++) {
tempCode = huffmanCode.substring(flags, item);
//char[] tempC = tempCode.toCharArray();
// 跟huffmanStr比较得出cin,也就是压缩前的字节数据,
for (int jtem = 0; jtem < huffmanStr.length; jtem++) {
//pw(Integer.parseInt(huffmanStr[jtem], 2) + "---");
//p(Integer.parseInt(tempCode, 2));
if(huffmanStr[jtem].length() == tempCode.length()){
if (huffmanStr[jtem].equalsIgnoreCase(tempCode)) {
// 存储到dop所指向的文件中去
dop.write(cin[jtem]);
flags = item;
//p(huffmanStr[jtem].length() + "-" + tempCode.length());
break;
}
}
}
}
dop.close();
p("解压文件结束、、、、、、");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
public static void pw(Object o) {
System.out.print(o);
}
public static void p(Object o) {
System.out.println(o);
}
}
class HuffmanTree {
int weight;
int parent, lchild, rchild;
}