From 9a48ee6a5ee04fb841eb2c6c63db2e6d97bd3672 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matt=C3=A9o=20Delabre?= Date: Thu, 6 Oct 2016 18:43:57 +0200 Subject: [PATCH] =?UTF-8?q?Renommages=20pour=20consignes=20&=20d=C3=A9but?= =?UTF-8?q?=20impl=C3=A9mentation=20buildTree?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 4 ++-- compress | Bin 0 -> 8912 bytes include/huf.h | 28 ++++++++++++++++++++++++++++ main.c | 9 --------- src/freq.c | 29 ----------------------------- src/freq.h | 1 - src/freq.o | Bin 1896 -> 0 bytes src/huf.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/huf.o | Bin 0 -> 2136 bytes src/main.c | 24 ++++++++++++++++++++++++ src/main.o | Bin 944 -> 2080 bytes 11 files changed, 104 insertions(+), 41 deletions(-) create mode 100755 compress create mode 100644 include/huf.h delete mode 100644 main.c delete mode 100644 src/freq.c delete mode 100644 src/freq.h delete mode 100644 src/freq.o create mode 100644 src/huf.c create mode 100644 src/huf.o diff --git a/Makefile b/Makefile index d2f077d..efd3ce3 100644 --- a/Makefile +++ b/Makefile @@ -3,11 +3,11 @@ FLAGS=-Wall -std=c99 SRC=src PROG=compress -all: $(SRC)/main.o $(SRC)/freq.o +all: $(SRC)/main.o $(SRC)/huf.o $(CC) $^ $(FLAGS) -o $(PROG) %.o: %.cpp $(CC) $(FLAGS) -c $^ camille: - rm -f $(PROG) *.o *.gch + rm -f $(PROG) **/*.o **/*.gch diff --git a/compress b/compress new file mode 100755 index 0000000000000000000000000000000000000000..eccd93457ea79faefbb2eba632be08874530f6bc GIT binary patch literal 8912 zcmeHMZ){uD6~DIgkEGac1~jycJ^@8(MNFEdA^aJAPU7^XOWM#n&}mCw9Q!#jICij~ zyR=iG$fd)}S}L(={W59OHqlLJ8fenGeIN!*Fa-FBOeQL(d!8FKxVGKY*iW9|){6<9mF zfyrzmyNy)=QVEyLTaX$xqRYZG(HbG=09pZ8yWIfV?J}!`Sfh3wwUM3>@+@ z64u4n1^uMEruzukN&H|Nrg=jgwM?eq#{Ita_1ow#k%H<^|;v*_pc5gH?+}3(; zG}IK0#8MMY6YXtHZLOYU+|$D2M*AqcW7h!hiSj4`hwdtjfzqs_`$FE0FmoBJ&>arFj>q|)AQU?Eg8GK6_{2{<4xUA()07~_X!?6_3Ab&wj zR~rk*$F&$6Pw7cEo`}TsFbj`pdXS9;qS1JesVa)9Nj;Fz)v-V%2FS=*JSGs8?da|4 z=u}%gEuL-Ym0kRdnhmmud8Fr^S|5RtWoKVPQ7Fr!?a|Uh=ZUe7|I_guAitxfz zfyTxn9G@$d<|3TNIJUJH;l<-tF2b>IDoT2(MwzVi)k#cwc3Q8@rlCbi|E6Y^Bn+D0 z)&e5=O|5WuZIq#g{3tdqF65zX+JZcWv^dZCJCMgv7NT+z=ktu3d3AGIE4-Q1rR>U?GMgkRACSCuVVXv^3314 zftl+LFrZ{|%1m~0WUwj1oXQ{OVpqPz;V}oZa9|p+w z5$|bAf(1`WZ)d5>%P)Xl!PtkWke?r1JOnyehbJUw4qsF{|$Be9K|i2h&$vYkj#n-x3DAcpPq%z9r?9 za{Q*>A_9ubif#Ui%gsReeAwlgIaEtEeXyJ?$oJ*0xc1F(>!qZW$(qT5C2Yy`oda>- zxy;$Am%wxGboRAR^7*uH$@a|Qr98OdIuVAz!I>PmZHQ0(x*+@=5ay{mKPz(6tYi*f zm}2mL$exBqP=XLIVL)@fERxI8q%UgPM{BpN7J!lDX zqXhq;9AEP$daFzh0QfsFjk$+Mw#9epifjM5u&WU>41E+b_0-Flr9vVkNJMtdmqROa zzN<(sZ<+L6HHDtFJmCWsX3#-bcNeym!Fs?>)ZBSt-HfIo^bHZ%^i0Pv*_u%{Y>4l$lQ)*>C(C z>^%FvE^Yl2&9~mnd%c-!U0@-9(?umcYg6vOoO%xzqOS~kzv>ka z6qFh_g*Jocrf@Cu$Mrx|)EP@)9t$P6-L}E;U>ESXhA#grpT`Ai0>T*r8iVxU;QS_x zoC`o-fQh*bv;hWm1B{dBfDYkAg@W&CEVe^?m~Fys+pwm(=A^CKjW~XrxxwZZ6cDAf zJY#o3JIW7%FXJfdbay)&cDm}mR&#`HU;p5iFWhwp!YG4&{|xNjhCX<}a zY|u4t4*~pBh~WlPdqtuBD73!-u}uIi=-ZR1&Tan);ALpnu)V|Se%{{UY&hZYIi=$j z9nQvYRVvQrV^xZ?Jzd@BloQT&ud~_fZ0vAKAP4dt&KjN; z^?V%pQ+63iVb3?=FnIcAJYzTbT9Co=m_b;bpbdiFEGQBDz~cozpRmvzrVlw<$LJe$ z9hZVNQp8I*6Fm7%{fX@xBOaCs9?ydCAC_3oQ(kj|pBL?-yZHU8ZGCEMo2R+e)3VvXrIzNcZOv`X z_ghV@;vF#h+U;@)++NsM7dJX!hpCs8uPBTwvwS5xQ&PT)(YQ0qSF=*j%K@X9p0imV zMuo_y8LusjD>GilXnvXTHEh15og0~4;(0ijvv8hcw&P+nU(L9ijf;6~#@80kBg}X` zlWOH+h}RwLbA|a|+~|O}C(U~^elx@EhP61@EpYx)3SU>)zZUB_aMp==ZN_h9N=bY@ z!|ju`I2f&3D=j!*E7*tl32G|19@^My|8X6(L;jCgo(oz=eAxomSb?)N<5bz170ewm&)Mf^6(0`U(fpMWT>r_X{eUU4@#8k_xIQU zep<)Lp8>U%7!U6Xd3xV)7H+ z;+~jq7Xe>uzm@e{=F1}Bn71e3m;=iMzccm`%0mydo>U0j4v9s1JQl&izr}&pbDTq_ z+XdjD$@4NA2y|>sM74(h}N8B&ll&RUcD>(RfTtf>#KV zaVkjaCQ&?f1ph`;hL_!YynQ~^x2p@>G<>GINi)RMt}pNM_VsjvR8b2*$pi?FepR2M z8rGDqJxtxvySu~NtM2aZ-s|(Les4#w4}0L}pTT5`$H3Gcf1kRN9e-aZ=Effwj%xf6 zRhEknL{%*m&;vY0DP8MMXiuiJSTF*)G|pPh(){4n46QnTwKk)D<17|jH66^t(@MtG z(LgK&t_`Om(U3o(X%N6}kPk&-YAUHgl2+owB#C2RYcIper09en0rN9si_Fdg>K-v< zI-`MPl8+Zi4?QW&8;vPb)|e-Ga7+&j1Jx4-9i}g4GpS49t1JyCNLHWf)8;W%>#l%!vm>h9+*R8 zFfS~#{J&@My$(xK;l0srmw{XPpNsZ69E2On7MDALsfG)d3ts$oSx`<^twpq+8m)|N zg-WqLegEph;9Vprs8zoU@N2Laq5e#(f_+vc`J{H`>rB=`u=V} zqfo5!R{k#_gDz74w4akcr9OVg9<;N(kF`lpHtt(q7`D{zvnmfgECx!nx@!)QUM)Z;5p>+|bS*iYy Nhy%Ez7US0b{{v2lqmTdq literal 0 HcmV?d00001 diff --git a/include/huf.h b/include/huf.h new file mode 100644 index 0000000..50a1a73 --- /dev/null +++ b/include/huf.h @@ -0,0 +1,28 @@ +#ifndef __FREQ_H__ +#define __FREQ_H__ + +typedef struct Vertex Vertex; +struct Vertex { + // le double `freq` indique la fréquence d'apparition de la lettre + // du sommet dans le corpus, ou bien la somme des fréquences de ses fils + double frequency; + + // le caractère `value` est le caractère du corpus que ce sommet + // représente (caractère ASCII 0 - 255) + char value; + + // pointe vers le sommet parent à celui-ci (ou bien vers NULL s'il n'a + // pas de parent affecté, comme c'est le cas pour les sommets non-traités + // et pour la racine) + Vertex* parent; +}; + +// calcule un tableau des fréquences des caractères dans le corpus, +// indexé par la valeur numérique ASCII de chaque caractère +double* computeFrequencies(const char* filepath); + +// déduit du tableau de fréquences l'arbre de Huffman, représenté +// par une liste de sommets chaînés +Vertex* buildTree(double* frequencies); + +#endif diff --git a/main.c b/main.c deleted file mode 100644 index 9185482..0000000 --- a/main.c +++ /dev/null @@ -1,9 +0,0 @@ -#include "freq.h" -#include - -int main(int argc, char** argv) { - if (argc != 2) { - printf("Usage : compress \n") - printf("Paramètre fichier manquant.\n"); - } -} diff --git a/src/freq.c b/src/freq.c deleted file mode 100644 index b27e328..0000000 --- a/src/freq.c +++ /dev/null @@ -1,29 +0,0 @@ -#include "freq.h" -#include -#include - -double* computeFreq(const char* filepath) { - double* freq = (double*) malloc(256 * sizeof(double)); - int totalChars = 0; - - // initialisation du tableau à 0 - for (int i = 0; i < 256; i++) { - freq[i] = 0; - } - - // parcours du fichier et comptage des caractères - FILE* file = fopen(filepath, "r"); - char current; - - while ((current = fgetc(file)) != EOF) { - freq[(size_t) current]++; - totalChars++; - } - - // conversion des effectifs en fréquences - for (int i = 0; i < 256; i++) { - freq[i] /= totalChars; - } - - return freq; -} diff --git a/src/freq.h b/src/freq.h deleted file mode 100644 index a44cb1d..0000000 --- a/src/freq.h +++ /dev/null @@ -1 +0,0 @@ -double* computeFreq(const char* filepath); diff --git a/src/freq.o b/src/freq.o deleted file mode 100644 index 2a9d54d04cc92df43a6c6fbdbb7eaa3e7fe3d676..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1896 zcmbu9&r1|x7{{M+Ojpxp4G)1Gwo6&)Fe|!LhuV_SH;4#f$x}IQvkAGowmSj z)D3O=1CO<*V2FJtykD8SFP3%>3!7!LM!M=m$*i?xQcBcV@*7+0`A!;EN?>|lF?2Ui z@shdM(C&XuX@la78s=Mb^L>wp)xUfsH?$ldL|?xSljGx8BZG*U( zRdhKT(~VJmNrLT3vjpwfE@FcmslC@D{S@^rr) z8QoL3MGY`;3_o`ahg8mfq9-AlO)nH2djeOGw&v%vNl0ZE>WW-+FkDM<4NUiq$kd|1|Abv!Lgl93SW8EN9PKx??Xo zpg*)63-sB19`sz6FYrs#b_Vw{S;y8V?<9uvj+K0j0&=tG9;9+s+HRinpOXms(;M_x zHja!;)|0QeR?capdeR-mv3bo9Pjpd0x??&=zN|BXpBLQ!qHmEn!2Ea!H5Bt-{iEnh zG5@L^uJ4M2{BKz1j}q7Dp8`n@zHIh0!hftu_(WjWhsaHAl5qv^-vq$`?LEQ#`L7HA z%if?k_vb%_EU13(#D&T_QT@nQ`b*x2%zyWYU`s%Y79R6|7OnlclYCp|FGl*!bz$?L RPb}#CkboC(-)sK!{{W1L&{O~b diff --git a/src/huf.c b/src/huf.c new file mode 100644 index 0000000..635508d --- /dev/null +++ b/src/huf.c @@ -0,0 +1,50 @@ +#include "../include/huf.h" +#include +#include + +double* computeFrequencies(const char* filepath) { + double* frequencies = (double*) malloc(256 * sizeof(double)); + int totalChars = 0; + + // initialisation du tableau à 0 + for (int i = 0; i < 256; i++) { + frequencies[i] = 0; + } + + // parcours du fichier et comptage des caractères + FILE* file = fopen(filepath, "r"); + char current; + + while ((current = fgetc(file)) != EOF) { + frequencies[(size_t) current]++; + totalChars++; + } + + // conversion des effectifs en fréquences + for (int i = 0; i < 256; i++) { + frequencies[i] /= totalChars; + } + + return frequencies; +} + +Vertex* buildTree(double* frequencies) { + int vertexCount = 0; + + // on compte le nombre de sommets à allouer + // (uniquement les caractères qui apparaissent au moins une fois) + for (int i = 0; i < 256; i++) { + if (frequencies[i] > 0) { + vertexCount++; + } + } + + // on alloue la place pour 2n + 1 sommets (nombre total + // de sommets dans l'arbre binaire) + Vertex* tree = (Vertex*) malloc((2 * vertexCount - 1) * sizeof(Vertex)); + + // (à faire: algo pour remplir l'arbre) + // (trier le tableau des fréquences ?) + + return tree; +} diff --git a/src/huf.o b/src/huf.o new file mode 100644 index 0000000000000000000000000000000000000000..0f781c43117d76329c66bf6e5484ec2abab1b8a9 GIT binary patch literal 2136 zcmb_c&rcIU6n;xleh6+c9EfSxHxRBI=Lzvf2h|mRA<%@`vb}|Eg!%?y617Kmp+{F&*`6!Dta=-9 z*tM0_@eN07UqOijokzc7dkb@WhpDw3E0StqB+H6bm1KiO-T168w|ucPXMUMD5}^idb*auJ;`#D{XJ`M)Oi-#6qR@?W9gcCMhj7z1V2t8}(B zG_K%-IEOe5vL<>UH&1()>V{iSdg>LV*uHDF7hslV;jc@Q`&c8XdiY6)t};)}mk8j& zzgp%&rCR0z>G4Vg4>U--qos%5aSijp$tsi-&Pa9Iy+Q0O4sR73;QTT8#bfYZ#7ST1 z+Px*+$y7u$z=%g@QkFTAFrTK(m@#E0AsU*VjvEk(&zLcYJT@%@!l|k0iF*mtBrV74 zaB9Z!>(H@rjuR$#>k-F^e+q}-FG)D@Md#m0c#FhO(nX`W^ZIPm!O15qz!!wk5a$Oi z`8;Zc!Z=$AXPy+RggG73EOXWZZ6aiaKno|6pe5qY23r+1W9W;;EmIr46YNb|A>#=W zD9oIE7)gYp*rESpK9r#&yI)KIF|tM84`=6a#hT_$9MsbQ4%tfl>925*?u4Yoi|yd} z1&+JFlnqy2PM2P=gY@7;n*SQ#hq07OzEj-77SHegHbnizA)|jrMD^1~vRxiOC}FhZ z@SpjXgvr!acR}1K;sy6Uk9X6(91!1&;JfP|Lr1OqTi_pxI2YX?#fo@we{G1ldyf#j o + +int main(int argc, const char** argv) { + if (argc != 2) { + printf("Usage : compress \n"); + printf("Paramètre fichier manquant.\n"); + return 1; + } + + double* frequencies = computeFrequencies(argv[1]); + double sum = 0; + + for (int i = 0; i < 256; i++) { + if (frequencies[i] != 0) { + sum += frequencies[i]; + printf("%c (%d) : %f\n", i, i, frequencies[i]); + } + } + + printf("Total : %f\n", sum); + + return 0; +} diff --git a/src/main.o b/src/main.o index fac15ccd67e80fc97ca5f62054820d992b7daf35..42f25880a0b3558a037c4638fadceccfa1221ac3 100644 GIT binary patch literal 2080 zcmbtU&r1|h9RJR)rES`-Ar&Ywmkos+c10JHkWCq#Lq**<5mWb>k7KlJbt9;kWwRB~1TdUYx<;Cp~yZoLAJM=lymMQA)5Bz9V zbxSL+G-*}PD(enStEdOL=+Bk?Xsf!nuGYwPcL_gLwWht%mfAiLf{q88w6!0N0g^X! zQDt=@A~bgYBrfvD_V%L_t4{PFmR4=5W@@xj8QVa*x>5P)RPGe*R9IGP((}by9m|Nm z%GK+2EM^bOcIiIyGF7{|ma+6lhCCo=ONCj}uq^p{E}xyw8|EO~)=j;z_LiB3%rUv3 z7w2rf$P|cY<<9t27kb8X5x7%g`V7}$ba;3`?#xWuMP|!=O0SaWPT7u?e40!tiC(3r z%f;oML^72~C9Y8|0B!CB(!w}M^D(Ktxv}Xrwxk1b8YCUmq8sO8FG9mDr^6Q|q&Rrc zho}wki_YVigadC7Kp1~P62?eD5{6SjFh*%S0S~S>45yFriFw{rr?Yp&T5!aUv56au z$4|hapy!K9)|p_|W){#bu$eJp8gsT$%;pUXX3cz&<(zC6c+LpU-d(qIJcKi?{l=?D zB6D2u1&`_>@$-jV*ul@DF5-OVxQNrw?Qb4_vxnbEjYkpu5!ZxPj#FMQU*_wC(>N5x zdQxEeBq$a$-FaGQrZJ-{%$R4OOzBJqWzw=hF-xw2=U6a`xK=DNLm9o5>9&}jeT)Qh zGo~Np@M#%`dHz$8(Vw>Fzd>olNW{HyEte~FnyD{zyVE$l@{&J=PaLG4P#$rEFLC?| z$Newo9zSnRJDmLRRa~K%|LUJaUyAux@6Z&NUpP#Ux{1Y6c`{5OT{y!$%0EGYm delta 240 zcmZ1=uz`JohIj!Z0~|PjSq==G3~ZCvGFnf5<GX0xQd88zv)0rpby-qLXu&q$bZ} zl9=4aB+QnZn3<=SJh_}jmcJymqJ%*&C9x!tK`*Jec=84osmYI6>?Rv9u})3^8lb`| zGg*U4Wbz6&9!AB<3t7b(9e~D3gZb){A0qQ7KLDyvVH06;V3=IRrp9PGc_EuPV<}Ae q