huffman哈夫曼编码的erlang实现

原创
2019/12/02 20:11
阅读数 114

用途

  • 1.发协议,通信
  • 2.文件压缩 - 无损压缩

原理

  • 哈夫曼就是为了获得最少编码量代替最多字符串,并且不冲突,系统不会误判而产生的。
  • 出现得越多的字母,他的编码越短 ,出现频率越少的字母,他的编码越长。

案例

  • erlang:term_to_binary(Data, [{compressed, 9}]). 可以将7k的数据压缩为1k左右

左起字串不冲突原则

如不能出现读码读到01还有长编码的字母为011的情况,如果短编码为一个长编码的左起字串,这就是冲突,意思是说读到当前位置已经能确定是什么字母时不能因为再读取一位或几位这个编码让这个编码能表示另外的字母。

哈夫曼树如何避免左起字串问题

如原文ABC编码为10010011的情况,当计算机读到10时,由于有左起子串不冲突的原则。那么计算机完全可以保证当前的10就是A字母,然后再往下读010011的部分,然后当读到01时,也完全能确定B,C同理,而不用说因为会出现冲突的编码而接着继续读取之后的编码来确定前面的编码。

动态哈夫曼树

  • 待补充

静态哈夫曼树

  • 第一步:遍历整个列表,统计各个字符出现的频率
  • 第二步:根据字符出现的频率构建哈夫曼树,并编码,将编码信息存储起来
  • 第三步:解压,根据哈夫曼编码,从树中依次获取实际的存储的值

思考

  • 我觉得还是没有明白,如何将10010011很好的区分成10,01,0011?
    答:采用从前往后的匹配,保证每一个哈夫曼编码都是唯一不重复。
  • 如果将这一批的字符串找到所有的字母表频率,并编号为哈夫曼编码,如果让接收方也知道这些编码对应的字母意思呢?
    答:从左往右依次匹配,编码能保证所有的编码保证唯一,不会解析错误

erlang实现静态哈夫曼树

-module(huffman).
-author("Administrator").

-export([main/0]).

-record(huffman_node,{
    code = none :: binary(),  %% code编码—(二定制格式)
    val = 0,   %% 节点的值
    weight = 0  %% 节点权重
    ,lchild = none :: #huffman_node{} %% 左孩子  (huffman_code)
    ,rchild = none  :: #huffman_node{}%% 右孩子 (huffman_code)
}).

main()  ->
    A = [{a, 20},{b,5},{c,6},{d,8},{e,88},{f,98}, {ee,1},{ff,1222}],
    A1 = format_list(A),
    HuffmanTree = build_huffman_tree(A1, false),
    io:format("huffman tree = ~p ~n", [HuffmanTree]),
%%   根据huffman树,对数据进行编码 [{a, 二进制}]
    HuffmanCodesTree = huffman_code(HuffmanTree),
    io:format("huffman codes = ~p ~n", [HuffmanCodesTree]),
%%     对字符串进行编码,并转换成二进制格式
    Msg = [a,b,c],
    F = fun(Atom, BinaryOut) ->
            case lists:keyfind(Atom, 1, HuffmanCodesTree) of
                false ->
                    BinaryOut;
                {Atom, Val} ->
                    <<BinaryOut/bitstring,Val/bitstring>>
            end
        end,
    MsgHuffmanCode = lists:foldl(F, <<>>, Msg),
    io:format("MsgHuffmanCode codes = ~p ~n", [MsgHuffmanCode]),
%%  解码,针对哈夫曼树编码,解码信息
    DecodeHuffmanMsg = decode_huffman(HuffmanCodesTree, MsgHuffmanCode,1, []),
    DecodeHuffmanMsg_1 = lists:reverse(DecodeHuffmanMsg),
    io:format("DecodeHuffmanMsg_1 decodestr = ~p ~n", [DecodeHuffmanMsg_1]),

%%     测试数据发送
    PackCode = test_code_msg(HuffmanCodesTree, MsgHuffmanCode),
    test_unpack_code(PackCode),

    ok.

test_code_msg(HuffmanCodesTree, MsgHuffmanCode) ->
    PackMsg = pack_decode_msg(HuffmanCodesTree, MsgHuffmanCode),
    io:format("test code msg = ~p", [PackMsg]),
    PackMsg.

test_unpack_code(PackMsg) ->
    unpack_msg(PackMsg).


pack_decode_msg(HuffmanCodesTree, MsgHuffmanCode) ->

    HuffmanCodesTree_1 = term_to_binary(HuffmanCodesTree),
    io:format("pack HuffmanCodesTree_1 msg = ~p ~n", [HuffmanCodesTree_1]),
    D_a_t_a_2 = <<(byte_size(HuffmanCodesTree_1)):32,HuffmanCodesTree_1/binary,MsgHuffmanCode/bitstring>>,
    io:format("pack decode msg = ~p ~n", [D_a_t_a_2]),
    D_a_t_a_2.

unpack_msg(Bin) ->
    <<ByteSize:32,Rest/bitstring>> = Bin,
    AllByteLen = ByteSize,
    io:format("Huffman code ByteSize = ~p, Rest = ~p ~n", [ByteSize, Rest]),
    <<D_a_t_a:AllByteLen/binary, Rest_1/bitstring>> = Rest,

    D_a_t_a_1 = binary_to_term(D_a_t_a),
%%    HuffmanCodesTree = [{binary_to_atom(Atom,utf8), AtomCode} || <<Atom/binary, AtomCode/bitstring>> <- D_a_t_a_1],
%%    io:format("Huffman code tree = ~p, msgHuffmancode = ~p ~n", [D_a_t_a_1, MsgHuffmanCode]),
%%    <<MsgHuffmanCode/bitstring>> = Rest_1,
    io:format("Huffman code tree = ~p, MsgHuffmanCode = ~p ~n", [D_a_t_a_1, Rest_1]),
    ok.

format_list(List) ->
    lists:flatmap(fun({Val, Weight}) ->
        [#huffman_node{val = Val,weight = Weight}] end, List).

sort_huffman(#huffman_node{weight = Weight1}, #huffman_node{weight = Weight2}) ->
    Weight1 =< Weight2.

build_huffman_tree(List, true) ->
    List;
build_huffman_tree(List, false) ->
    List_1 = lists:sort(fun sort_huffman/2, List),
    if
        length(List_1) =< 1 ->
            build_huffman_tree(List_1, true);
        true ->
            [H1,H2|Rest] = List_1,
            Weight = H1#huffman_node.weight + H2#huffman_node.weight,
            List_2 = [#huffman_node{val = none, weight = Weight, lchild = H1, rchild = H2}|Rest],
            build_huffman_tree(List_2, false)
    end.

%% 获取huffman元素编码
huffman_code(HuffmanTree) ->
    case HuffmanTree of
        [] ->
            [];
        [#huffman_node{lchild = LChild, rchild = RChild}] ->
%%            LChild_1 = LChild#huffman_node{code = <<0/binary>>},
%%            RChild_1 = RChild#huffman_node{code = <<1/binary>>},
            LChild_1 = LChild#huffman_node{code = <<0:1>>},
            RChild_1 = RChild#huffman_node{code = <<1:1>>},
            HuffmanCodes_1 =  get_huffman_code(LChild_1),
            HuffmanCodes_2 = HuffmanCodes_1 ++ get_huffman_code(RChild_1),
            HuffmanCodes_3 = huffman_code_1(LChild_1,HuffmanCodes_2),
            huffman_code_1(RChild_1, HuffmanCodes_3)
    end.
%%
huffman_code_1(HuffmanTree, HuffmanCodes) ->
    case HuffmanTree of
        [] ->
            HuffmanCodes;
        #huffman_node{lchild = none, rchild = none} ->
            HuffmanCodes;
        #huffman_node{code = Code, lchild = LChild, rchild = RChild} ->
%%            LChild_1 = LChild#huffman_node{code = lists:concat([Code,"0"])},
%%            RChild_1 = RChild#huffman_node{code = lists:concat([Code,"1"])},
%%            Code_1 = <<0:8>>,
%%            A = <<0:1,Code_1/binary >> ,
%%            io:format("Code = ~p, A = ~p ~n", [Code, A]),

            LChild_1 = LChild#huffman_node{code = <<Code/bitstring,0:1>>},
            RChild_1 = RChild#huffman_node{code = <<Code/bitstring,1:1>>},
            HuffmanCodes_1 = HuffmanCodes ++ get_huffman_code(LChild_1),
            HuffmanCodes_2 = HuffmanCodes_1 ++ get_huffman_code(RChild_1),
            HuffmanCodes_3 = huffman_code_1(LChild_1, HuffmanCodes_2),
            huffman_code_1(RChild_1, HuffmanCodes_3)
    end.


get_huffman_code(HuffmanNode) ->
    case HuffmanNode of
        #huffman_node{val = Val, code = Code, lchild = none, rchild = none} ->
            [{Val, Code}];
        _ ->
            []
    end.

decode_huffman(_HuffmanCodeTree, <<>>, _N, Str) ->
    Str;
decode_huffman(HuffmanCode, HuffmanStr, N, Str) ->
    <<X:N/bitstring,Rest/bitstring>> = HuffmanStr,
%%    io:format("huffmanstr = ~p, N = ~p, Str = ~p, X = ~p,Rest = ~p ~n", [HuffmanStr, N, Str, X, Rest]),
    case lists:keyfind(X,2,HuffmanCode) of
        false ->
            decode_huffman(HuffmanCode, HuffmanStr, N+1, Str);
        {Val, X} ->
%%            io:format("decode val = ~p ~n", [Val]),
            decode_huffman(HuffmanCode, Rest, 1, [Val|Str])
    end.


erlang 二进制参考

参考

展开阅读全文
打赏
0
0 收藏
分享
加载中
更多评论
打赏
0 评论
0 收藏
0
分享
返回顶部
顶部