all repos — mgba @ 81a52403a3583039f4e571f1516cd0efe4872c4b

mGBA Game Boy Advance Emulator

src/third-party/zlib/contrib/masmx86/inffas32.asm (view raw)

   1;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
   2; *
   3; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
   4; *
   5; * Copyright (C) 1995-2003 Mark Adler
   6; * For conditions of distribution and use, see copyright notice in zlib.h
   7; *
   8; * Copyright (C) 2003 Chris Anderson <christop@charm.net>
   9; * Please use the copyright conditions above.
  10; *
  11; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
  12; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
  13; * the moment.  I have successfully compiled and tested this code with gcc2.96,
  14; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
  15; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
  16; * enabled.  I will attempt to merge the MMX code into this version.  Newer
  17; * versions of this and inffast.S can be found at
  18; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
  19; *
  20; * 2005 : modification by Gilles Vollant
  21; */
  22; For Visual C++ 4.x and higher and ML 6.x and higher
  23;   ml.exe is in directory \MASM611C of Win95 DDK
  24;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
  25;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
  26;
  27;
  28;   compile with command line option
  29;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
  30
  31;   if you define NO_GZIP (see inflate.h), compile with
  32;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
  33
  34
  35; zlib122sup is 0 fort zlib 1.2.2.1 and lower
  36; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
  37;        in inflate_state in inflate.h)
  38zlib1222sup      equ    8
  39
  40
  41IFDEF GUNZIP
  42  INFLATE_MODE_TYPE    equ 11
  43  INFLATE_MODE_BAD     equ 26
  44ELSE
  45  IFNDEF NO_GUNZIP
  46    INFLATE_MODE_TYPE    equ 11
  47    INFLATE_MODE_BAD     equ 26
  48  ELSE
  49    INFLATE_MODE_TYPE    equ 3
  50    INFLATE_MODE_BAD     equ 17
  51  ENDIF
  52ENDIF
  53
  54
  55; 75 "inffast.S"
  56;FILE "inffast.S"
  57
  58;;;GLOBAL _inflate_fast
  59
  60;;;SECTION .text
  61
  62
  63
  64	.586p
  65	.mmx
  66
  67	name	inflate_fast_x86
  68	.MODEL	FLAT
  69
  70_DATA			segment
  71inflate_fast_use_mmx:
  72	dd	1
  73
  74
  75_TEXT			segment
  76
  77
  78
  79ALIGN 4
  80	db	'Fast decoding Code from Chris Anderson'
  81	db	0
  82
  83ALIGN 4
  84invalid_literal_length_code_msg:
  85	db	'invalid literal/length code'
  86	db	0
  87
  88ALIGN 4
  89invalid_distance_code_msg:
  90	db	'invalid distance code'
  91	db	0
  92
  93ALIGN 4
  94invalid_distance_too_far_msg:
  95	db	'invalid distance too far back'
  96	db	0
  97
  98
  99ALIGN 4
 100inflate_fast_mask:
 101dd	0
 102dd	1
 103dd	3
 104dd	7
 105dd	15
 106dd	31
 107dd	63
 108dd	127
 109dd	255
 110dd	511
 111dd	1023
 112dd	2047
 113dd	4095
 114dd	8191
 115dd	16383
 116dd	32767
 117dd	65535
 118dd	131071
 119dd	262143
 120dd	524287
 121dd	1048575
 122dd	2097151
 123dd	4194303
 124dd	8388607
 125dd	16777215
 126dd	33554431
 127dd	67108863
 128dd	134217727
 129dd	268435455
 130dd	536870911
 131dd	1073741823
 132dd	2147483647
 133dd	4294967295
 134
 135
 136mode_state	 equ	0	;/* state->mode	*/
 137wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
 138write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
 139window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
 140hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
 141bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
 142lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
 143distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
 144lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
 145distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
 146
 147
 148;;SECTION .text
 149; 205 "inffast.S"
 150;GLOBAL	inflate_fast_use_mmx
 151
 152;SECTION .data
 153
 154
 155; GLOBAL inflate_fast_use_mmx:object
 156;.size inflate_fast_use_mmx, 4
 157; 226 "inffast.S"
 158;SECTION .text
 159
 160ALIGN 4
 161_inflate_fast proc near
 162.FPO (16, 4, 0, 0, 1, 0)
 163	push  edi
 164	push  esi
 165	push  ebp
 166	push  ebx
 167	pushfd
 168	sub  esp,64
 169	cld
 170
 171
 172
 173
 174	mov  esi, [esp+88]
 175	mov  edi, [esi+28]
 176
 177
 178
 179
 180
 181
 182
 183	mov  edx, [esi+4]
 184	mov  eax, [esi+0]
 185
 186	add  edx,eax
 187	sub  edx,11
 188
 189	mov  [esp+44],eax
 190	mov  [esp+20],edx
 191
 192	mov  ebp, [esp+92]
 193	mov  ecx, [esi+16]
 194	mov  ebx, [esi+12]
 195
 196	sub  ebp,ecx
 197	neg  ebp
 198	add  ebp,ebx
 199
 200	sub  ecx,257
 201	add  ecx,ebx
 202
 203	mov  [esp+60],ebx
 204	mov  [esp+40],ebp
 205	mov  [esp+16],ecx
 206; 285 "inffast.S"
 207	mov  eax, [edi+lencode_state]
 208	mov  ecx, [edi+distcode_state]
 209
 210	mov  [esp+8],eax
 211	mov  [esp+12],ecx
 212
 213	mov  eax,1
 214	mov  ecx, [edi+lenbits_state]
 215	shl  eax,cl
 216	dec  eax
 217	mov  [esp+0],eax
 218
 219	mov  eax,1
 220	mov  ecx, [edi+distbits_state]
 221	shl  eax,cl
 222	dec  eax
 223	mov  [esp+4],eax
 224
 225	mov  eax, [edi+wsize_state]
 226	mov  ecx, [edi+write_state]
 227	mov  edx, [edi+window_state]
 228
 229	mov  [esp+52],eax
 230	mov  [esp+48],ecx
 231	mov  [esp+56],edx
 232
 233	mov  ebp, [edi+hold_state]
 234	mov  ebx, [edi+bits_state]
 235; 321 "inffast.S"
 236	mov  esi, [esp+44]
 237	mov  ecx, [esp+20]
 238	cmp  ecx,esi
 239	ja   L_align_long
 240
 241	add  ecx,11
 242	sub  ecx,esi
 243	mov  eax,12
 244	sub  eax,ecx
 245	lea  edi, [esp+28]
 246	rep movsb
 247	mov  ecx,eax
 248	xor  eax,eax
 249	rep stosb
 250	lea  esi, [esp+28]
 251	mov  [esp+20],esi
 252	jmp  L_is_aligned
 253
 254
 255L_align_long:
 256	test  esi,3
 257	jz   L_is_aligned
 258	xor  eax,eax
 259	mov  al, [esi]
 260	inc  esi
 261	mov  ecx,ebx
 262	add  ebx,8
 263	shl  eax,cl
 264	or  ebp,eax
 265	jmp L_align_long
 266
 267L_is_aligned:
 268	mov  edi, [esp+60]
 269; 366 "inffast.S"
 270L_check_mmx:
 271	cmp  dword ptr [inflate_fast_use_mmx],2
 272	je   L_init_mmx
 273	ja   L_do_loop
 274
 275	push  eax
 276	push  ebx
 277	push  ecx
 278	push  edx
 279	pushfd
 280	mov  eax, [esp]
 281	xor  dword ptr [esp],0200000h
 282
 283
 284
 285
 286	popfd
 287	pushfd
 288	pop  edx
 289	xor  edx,eax
 290	jz   L_dont_use_mmx
 291	xor  eax,eax
 292	cpuid
 293	cmp  ebx,0756e6547h
 294	jne  L_dont_use_mmx
 295	cmp  ecx,06c65746eh
 296	jne  L_dont_use_mmx
 297	cmp  edx,049656e69h
 298	jne  L_dont_use_mmx
 299	mov  eax,1
 300	cpuid
 301	shr  eax,8
 302	and  eax,15
 303	cmp  eax,6
 304	jne  L_dont_use_mmx
 305	test  edx,0800000h
 306	jnz  L_use_mmx
 307	jmp  L_dont_use_mmx
 308L_use_mmx:
 309	mov  dword ptr [inflate_fast_use_mmx],2
 310	jmp  L_check_mmx_pop
 311L_dont_use_mmx:
 312	mov  dword ptr [inflate_fast_use_mmx],3
 313L_check_mmx_pop:
 314	pop  edx
 315	pop  ecx
 316	pop  ebx
 317	pop  eax
 318	jmp  L_check_mmx
 319; 426 "inffast.S"
 320ALIGN 4
 321L_do_loop:
 322; 437 "inffast.S"
 323	cmp  bl,15
 324	ja   L_get_length_code
 325
 326	xor  eax,eax
 327	lodsw
 328	mov  cl,bl
 329	add  bl,16
 330	shl  eax,cl
 331	or  ebp,eax
 332
 333L_get_length_code:
 334	mov  edx, [esp+0]
 335	mov  ecx, [esp+8]
 336	and  edx,ebp
 337	mov  eax, [ecx+edx*4]
 338
 339L_dolen:
 340
 341
 342
 343
 344
 345
 346	mov  cl,ah
 347	sub  bl,ah
 348	shr  ebp,cl
 349
 350
 351
 352
 353
 354
 355	test  al,al
 356	jnz   L_test_for_length_base
 357
 358	shr  eax,16
 359	stosb
 360
 361L_while_test:
 362
 363
 364	cmp  [esp+16],edi
 365	jbe  L_break_loop
 366
 367	cmp  [esp+20],esi
 368	ja   L_do_loop
 369	jmp  L_break_loop
 370
 371L_test_for_length_base:
 372; 502 "inffast.S"
 373	mov  edx,eax
 374	shr  edx,16
 375	mov  cl,al
 376
 377	test  al,16
 378	jz   L_test_for_second_level_length
 379	and  cl,15
 380	jz   L_save_len
 381	cmp  bl,cl
 382	jae  L_add_bits_to_len
 383
 384	mov  ch,cl
 385	xor  eax,eax
 386	lodsw
 387	mov  cl,bl
 388	add  bl,16
 389	shl  eax,cl
 390	or  ebp,eax
 391	mov  cl,ch
 392
 393L_add_bits_to_len:
 394	mov  eax,1
 395	shl  eax,cl
 396	dec  eax
 397	sub  bl,cl
 398	and  eax,ebp
 399	shr  ebp,cl
 400	add  edx,eax
 401
 402L_save_len:
 403	mov  [esp+24],edx
 404
 405
 406L_decode_distance:
 407; 549 "inffast.S"
 408	cmp  bl,15
 409	ja   L_get_distance_code
 410
 411	xor  eax,eax
 412	lodsw
 413	mov  cl,bl
 414	add  bl,16
 415	shl  eax,cl
 416	or  ebp,eax
 417
 418L_get_distance_code:
 419	mov  edx, [esp+4]
 420	mov  ecx, [esp+12]
 421	and  edx,ebp
 422	mov  eax, [ecx+edx*4]
 423
 424
 425L_dodist:
 426	mov  edx,eax
 427	shr  edx,16
 428	mov  cl,ah
 429	sub  bl,ah
 430	shr  ebp,cl
 431; 584 "inffast.S"
 432	mov  cl,al
 433
 434	test  al,16
 435	jz  L_test_for_second_level_dist
 436	and  cl,15
 437	jz  L_check_dist_one
 438	cmp  bl,cl
 439	jae  L_add_bits_to_dist
 440
 441	mov  ch,cl
 442	xor  eax,eax
 443	lodsw
 444	mov  cl,bl
 445	add  bl,16
 446	shl  eax,cl
 447	or  ebp,eax
 448	mov  cl,ch
 449
 450L_add_bits_to_dist:
 451	mov  eax,1
 452	shl  eax,cl
 453	dec  eax
 454	sub  bl,cl
 455	and  eax,ebp
 456	shr  ebp,cl
 457	add  edx,eax
 458	jmp  L_check_window
 459
 460L_check_window:
 461; 625 "inffast.S"
 462	mov  [esp+44],esi
 463	mov  eax,edi
 464	sub  eax, [esp+40]
 465
 466	cmp  eax,edx
 467	jb   L_clip_window
 468
 469	mov  ecx, [esp+24]
 470	mov  esi,edi
 471	sub  esi,edx
 472
 473	sub  ecx,3
 474	mov  al, [esi]
 475	mov  [edi],al
 476	mov  al, [esi+1]
 477	mov  dl, [esi+2]
 478	add  esi,3
 479	mov  [edi+1],al
 480	mov  [edi+2],dl
 481	add  edi,3
 482	rep movsb
 483
 484	mov  esi, [esp+44]
 485	jmp  L_while_test
 486
 487ALIGN 4
 488L_check_dist_one:
 489	cmp  edx,1
 490	jne  L_check_window
 491	cmp  [esp+40],edi
 492	je  L_check_window
 493
 494	dec  edi
 495	mov  ecx, [esp+24]
 496	mov  al, [edi]
 497	sub  ecx,3
 498
 499	mov  [edi+1],al
 500	mov  [edi+2],al
 501	mov  [edi+3],al
 502	add  edi,4
 503	rep stosb
 504
 505	jmp  L_while_test
 506
 507ALIGN 4
 508L_test_for_second_level_length:
 509
 510
 511
 512
 513	test  al,64
 514	jnz   L_test_for_end_of_block
 515
 516	mov  eax,1
 517	shl  eax,cl
 518	dec  eax
 519	and  eax,ebp
 520	add  eax,edx
 521	mov  edx, [esp+8]
 522	mov  eax, [edx+eax*4]
 523	jmp  L_dolen
 524
 525ALIGN 4
 526L_test_for_second_level_dist:
 527
 528
 529
 530
 531	test  al,64
 532	jnz   L_invalid_distance_code
 533
 534	mov  eax,1
 535	shl  eax,cl
 536	dec  eax
 537	and  eax,ebp
 538	add  eax,edx
 539	mov  edx, [esp+12]
 540	mov  eax, [edx+eax*4]
 541	jmp  L_dodist
 542
 543ALIGN 4
 544L_clip_window:
 545; 721 "inffast.S"
 546	mov  ecx,eax
 547	mov  eax, [esp+52]
 548	neg  ecx
 549	mov  esi, [esp+56]
 550
 551	cmp  eax,edx
 552	jb   L_invalid_distance_too_far
 553
 554	add  ecx,edx
 555	cmp  dword ptr [esp+48],0
 556	jne  L_wrap_around_window
 557
 558	sub  eax,ecx
 559	add  esi,eax
 560; 749 "inffast.S"
 561	mov  eax, [esp+24]
 562	cmp  eax,ecx
 563	jbe  L_do_copy1
 564
 565	sub  eax,ecx
 566	rep movsb
 567	mov  esi,edi
 568	sub  esi,edx
 569	jmp  L_do_copy1
 570
 571	cmp  eax,ecx
 572	jbe  L_do_copy1
 573
 574	sub  eax,ecx
 575	rep movsb
 576	mov  esi,edi
 577	sub  esi,edx
 578	jmp  L_do_copy1
 579
 580L_wrap_around_window:
 581; 793 "inffast.S"
 582	mov  eax, [esp+48]
 583	cmp  ecx,eax
 584	jbe  L_contiguous_in_window
 585
 586	add  esi, [esp+52]
 587	add  esi,eax
 588	sub  esi,ecx
 589	sub  ecx,eax
 590
 591
 592	mov  eax, [esp+24]
 593	cmp  eax,ecx
 594	jbe  L_do_copy1
 595
 596	sub  eax,ecx
 597	rep movsb
 598	mov  esi, [esp+56]
 599	mov  ecx, [esp+48]
 600	cmp  eax,ecx
 601	jbe  L_do_copy1
 602
 603	sub  eax,ecx
 604	rep movsb
 605	mov  esi,edi
 606	sub  esi,edx
 607	jmp  L_do_copy1
 608
 609L_contiguous_in_window:
 610; 836 "inffast.S"
 611	add  esi,eax
 612	sub  esi,ecx
 613
 614
 615	mov  eax, [esp+24]
 616	cmp  eax,ecx
 617	jbe  L_do_copy1
 618
 619	sub  eax,ecx
 620	rep movsb
 621	mov  esi,edi
 622	sub  esi,edx
 623
 624L_do_copy1:
 625; 862 "inffast.S"
 626	mov  ecx,eax
 627	rep movsb
 628
 629	mov  esi, [esp+44]
 630	jmp  L_while_test
 631; 878 "inffast.S"
 632ALIGN 4
 633L_init_mmx:
 634	emms
 635
 636
 637
 638
 639
 640	movd mm0,ebp
 641	mov  ebp,ebx
 642; 896 "inffast.S"
 643	movd mm4,dword ptr [esp+0]
 644	movq mm3,mm4
 645	movd mm5,dword ptr [esp+4]
 646	movq mm2,mm5
 647	pxor mm1,mm1
 648	mov  ebx, [esp+8]
 649	jmp  L_do_loop_mmx
 650
 651ALIGN 4
 652L_do_loop_mmx:
 653	psrlq mm0,mm1
 654
 655	cmp  ebp,32
 656	ja  L_get_length_code_mmx
 657
 658	movd mm6,ebp
 659	movd mm7,dword ptr [esi]
 660	add  esi,4
 661	psllq mm7,mm6
 662	add  ebp,32
 663	por mm0,mm7
 664
 665L_get_length_code_mmx:
 666	pand mm4,mm0
 667	movd eax,mm4
 668	movq mm4,mm3
 669	mov  eax, [ebx+eax*4]
 670
 671L_dolen_mmx:
 672	movzx  ecx,ah
 673	movd mm1,ecx
 674	sub  ebp,ecx
 675
 676	test  al,al
 677	jnz L_test_for_length_base_mmx
 678
 679	shr  eax,16
 680	stosb
 681
 682L_while_test_mmx:
 683
 684
 685	cmp  [esp+16],edi
 686	jbe L_break_loop
 687
 688	cmp  [esp+20],esi
 689	ja L_do_loop_mmx
 690	jmp L_break_loop
 691
 692L_test_for_length_base_mmx:
 693
 694	mov  edx,eax
 695	shr  edx,16
 696
 697	test  al,16
 698	jz  L_test_for_second_level_length_mmx
 699	and  eax,15
 700	jz L_decode_distance_mmx
 701
 702	psrlq mm0,mm1
 703	movd mm1,eax
 704	movd ecx,mm0
 705	sub  ebp,eax
 706	and  ecx, [inflate_fast_mask+eax*4]
 707	add  edx,ecx
 708
 709L_decode_distance_mmx:
 710	psrlq mm0,mm1
 711
 712	cmp  ebp,32
 713	ja L_get_dist_code_mmx
 714
 715	movd mm6,ebp
 716	movd mm7,dword ptr [esi]
 717	add  esi,4
 718	psllq mm7,mm6
 719	add  ebp,32
 720	por mm0,mm7
 721
 722L_get_dist_code_mmx:
 723	mov  ebx, [esp+12]
 724	pand mm5,mm0
 725	movd eax,mm5
 726	movq mm5,mm2
 727	mov  eax, [ebx+eax*4]
 728
 729L_dodist_mmx:
 730
 731	movzx  ecx,ah
 732	mov  ebx,eax
 733	shr  ebx,16
 734	sub  ebp,ecx
 735	movd mm1,ecx
 736
 737	test  al,16
 738	jz L_test_for_second_level_dist_mmx
 739	and  eax,15
 740	jz L_check_dist_one_mmx
 741
 742L_add_bits_to_dist_mmx:
 743	psrlq mm0,mm1
 744	movd mm1,eax
 745	movd ecx,mm0
 746	sub  ebp,eax
 747	and  ecx, [inflate_fast_mask+eax*4]
 748	add  ebx,ecx
 749
 750L_check_window_mmx:
 751	mov  [esp+44],esi
 752	mov  eax,edi
 753	sub  eax, [esp+40]
 754
 755	cmp  eax,ebx
 756	jb L_clip_window_mmx
 757
 758	mov  ecx,edx
 759	mov  esi,edi
 760	sub  esi,ebx
 761
 762	sub  ecx,3
 763	mov  al, [esi]
 764	mov  [edi],al
 765	mov  al, [esi+1]
 766	mov  dl, [esi+2]
 767	add  esi,3
 768	mov  [edi+1],al
 769	mov  [edi+2],dl
 770	add  edi,3
 771	rep movsb
 772
 773	mov  esi, [esp+44]
 774	mov  ebx, [esp+8]
 775	jmp  L_while_test_mmx
 776
 777ALIGN 4
 778L_check_dist_one_mmx:
 779	cmp  ebx,1
 780	jne  L_check_window_mmx
 781	cmp  [esp+40],edi
 782	je   L_check_window_mmx
 783
 784	dec  edi
 785	mov  ecx,edx
 786	mov  al, [edi]
 787	sub  ecx,3
 788
 789	mov  [edi+1],al
 790	mov  [edi+2],al
 791	mov  [edi+3],al
 792	add  edi,4
 793	rep stosb
 794
 795	mov  ebx, [esp+8]
 796	jmp  L_while_test_mmx
 797
 798ALIGN 4
 799L_test_for_second_level_length_mmx:
 800	test  al,64
 801	jnz L_test_for_end_of_block
 802
 803	and  eax,15
 804	psrlq mm0,mm1
 805	movd ecx,mm0
 806	and  ecx, [inflate_fast_mask+eax*4]
 807	add  ecx,edx
 808	mov  eax, [ebx+ecx*4]
 809	jmp L_dolen_mmx
 810
 811ALIGN 4
 812L_test_for_second_level_dist_mmx:
 813	test  al,64
 814	jnz L_invalid_distance_code
 815
 816	and  eax,15
 817	psrlq mm0,mm1
 818	movd ecx,mm0
 819	and  ecx, [inflate_fast_mask+eax*4]
 820	mov  eax, [esp+12]
 821	add  ecx,ebx
 822	mov  eax, [eax+ecx*4]
 823	jmp  L_dodist_mmx
 824
 825ALIGN 4
 826L_clip_window_mmx:
 827
 828	mov  ecx,eax
 829	mov  eax, [esp+52]
 830	neg  ecx
 831	mov  esi, [esp+56]
 832
 833	cmp  eax,ebx
 834	jb  L_invalid_distance_too_far
 835
 836	add  ecx,ebx
 837	cmp  dword ptr [esp+48],0
 838	jne  L_wrap_around_window_mmx
 839
 840	sub  eax,ecx
 841	add  esi,eax
 842
 843	cmp  edx,ecx
 844	jbe  L_do_copy1_mmx
 845
 846	sub  edx,ecx
 847	rep movsb
 848	mov  esi,edi
 849	sub  esi,ebx
 850	jmp  L_do_copy1_mmx
 851
 852	cmp  edx,ecx
 853	jbe  L_do_copy1_mmx
 854
 855	sub  edx,ecx
 856	rep movsb
 857	mov  esi,edi
 858	sub  esi,ebx
 859	jmp  L_do_copy1_mmx
 860
 861L_wrap_around_window_mmx:
 862
 863	mov  eax, [esp+48]
 864	cmp  ecx,eax
 865	jbe  L_contiguous_in_window_mmx
 866
 867	add  esi, [esp+52]
 868	add  esi,eax
 869	sub  esi,ecx
 870	sub  ecx,eax
 871
 872
 873	cmp  edx,ecx
 874	jbe  L_do_copy1_mmx
 875
 876	sub  edx,ecx
 877	rep movsb
 878	mov  esi, [esp+56]
 879	mov  ecx, [esp+48]
 880	cmp  edx,ecx
 881	jbe  L_do_copy1_mmx
 882
 883	sub  edx,ecx
 884	rep movsb
 885	mov  esi,edi
 886	sub  esi,ebx
 887	jmp  L_do_copy1_mmx
 888
 889L_contiguous_in_window_mmx:
 890
 891	add  esi,eax
 892	sub  esi,ecx
 893
 894
 895	cmp  edx,ecx
 896	jbe  L_do_copy1_mmx
 897
 898	sub  edx,ecx
 899	rep movsb
 900	mov  esi,edi
 901	sub  esi,ebx
 902
 903L_do_copy1_mmx:
 904
 905
 906	mov  ecx,edx
 907	rep movsb
 908
 909	mov  esi, [esp+44]
 910	mov  ebx, [esp+8]
 911	jmp  L_while_test_mmx
 912; 1174 "inffast.S"
 913L_invalid_distance_code:
 914
 915
 916
 917
 918
 919	mov  ecx, invalid_distance_code_msg
 920	mov  edx,INFLATE_MODE_BAD
 921	jmp  L_update_stream_state
 922
 923L_test_for_end_of_block:
 924
 925
 926
 927
 928
 929	test  al,32
 930	jz  L_invalid_literal_length_code
 931
 932	mov  ecx,0
 933	mov  edx,INFLATE_MODE_TYPE
 934	jmp  L_update_stream_state
 935
 936L_invalid_literal_length_code:
 937
 938
 939
 940
 941
 942	mov  ecx, invalid_literal_length_code_msg
 943	mov  edx,INFLATE_MODE_BAD
 944	jmp  L_update_stream_state
 945
 946L_invalid_distance_too_far:
 947
 948
 949
 950	mov  esi, [esp+44]
 951	mov  ecx, invalid_distance_too_far_msg
 952	mov  edx,INFLATE_MODE_BAD
 953	jmp  L_update_stream_state
 954
 955L_update_stream_state:
 956
 957	mov  eax, [esp+88]
 958	test  ecx,ecx
 959	jz  L_skip_msg
 960	mov  [eax+24],ecx
 961L_skip_msg:
 962	mov  eax, [eax+28]
 963	mov  [eax+mode_state],edx
 964	jmp  L_break_loop
 965
 966ALIGN 4
 967L_break_loop:
 968; 1243 "inffast.S"
 969	cmp  dword ptr [inflate_fast_use_mmx],2
 970	jne  L_update_next_in
 971
 972
 973
 974	mov  ebx,ebp
 975
 976L_update_next_in:
 977; 1266 "inffast.S"
 978	mov  eax, [esp+88]
 979	mov  ecx,ebx
 980	mov  edx, [eax+28]
 981	shr  ecx,3
 982	sub  esi,ecx
 983	shl  ecx,3
 984	sub  ebx,ecx
 985	mov  [eax+12],edi
 986	mov  [edx+bits_state],ebx
 987	mov  ecx,ebx
 988
 989	lea  ebx, [esp+28]
 990	cmp  [esp+20],ebx
 991	jne  L_buf_not_used
 992
 993	sub  esi,ebx
 994	mov  ebx, [eax+0]
 995	mov  [esp+20],ebx
 996	add  esi,ebx
 997	mov  ebx, [eax+4]
 998	sub  ebx,11
 999	add  [esp+20],ebx
1000
1001L_buf_not_used:
1002	mov  [eax+0],esi
1003
1004	mov  ebx,1
1005	shl  ebx,cl
1006	dec  ebx
1007
1008
1009
1010
1011
1012	cmp  dword ptr [inflate_fast_use_mmx],2
1013	jne  L_update_hold
1014
1015
1016
1017	psrlq mm0,mm1
1018	movd ebp,mm0
1019
1020	emms
1021
1022L_update_hold:
1023
1024
1025
1026	and  ebp,ebx
1027	mov  [edx+hold_state],ebp
1028
1029
1030
1031
1032	mov  ebx, [esp+20]
1033	cmp  ebx,esi
1034	jbe  L_last_is_smaller
1035
1036	sub  ebx,esi
1037	add  ebx,11
1038	mov  [eax+4],ebx
1039	jmp  L_fixup_out
1040L_last_is_smaller:
1041	sub  esi,ebx
1042	neg  esi
1043	add  esi,11
1044	mov  [eax+4],esi
1045
1046
1047
1048
1049L_fixup_out:
1050
1051	mov  ebx, [esp+16]
1052	cmp  ebx,edi
1053	jbe  L_end_is_smaller
1054
1055	sub  ebx,edi
1056	add  ebx,257
1057	mov  [eax+16],ebx
1058	jmp  L_done
1059L_end_is_smaller:
1060	sub  edi,ebx
1061	neg  edi
1062	add  edi,257
1063	mov  [eax+16],edi
1064
1065
1066
1067
1068
1069L_done:
1070	add  esp,64
1071	popfd
1072	pop  ebx
1073	pop  ebp
1074	pop  esi
1075	pop  edi
1076	ret
1077_inflate_fast endp
1078
1079_TEXT	ends
1080end