; _______________________________________________________________________________________
;|                                                                                       |
;| ..:: Fresh IDE ::..  template project.                                                |
;|_______________________________________________________________________________________|
;
;  Description: FreshLib portable console application.
;
;  Target OS: Any, supported by FreshLib
;
;  Dependencies: FreshLib
;
;  Notes:
;_________________________________________________________________________________________

include "%lib%/freshlib.inc"

@BinaryType console, default

options.DebugMode = 0
options.AlignCode = 1
options.FastEnter = 1

include "%lib%/freshlib.asm"

uglobal
  cnt dd ?
  total dd ?
  hfile dd ?
endg


start:
        InitializeAll

;        stdcall StrDupMem, txt "Testaaa"
;        mov     ecx, eax
;
;        stdcall StrDupMem, txt "Test"
;        mov     edx, eax
;
;        stdcall Levenshtein, ecx, edx, 100
;        jmp     .finalize

        stdcall ReadDictionary, txt "dd/Dataset.csv"

        stdcall FileWriteString, [STDOUT], txt "Dictionary (SetA) length: "
        stdcall NumToStr, ecx, ntsDec or ntsUnsigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack
        stdcall FileWriteString, [STDOUT], <txt 13, 10, 13, 10>


        stdcall FileOpen, "dd/t2.csv"
        mov     [hfile], eax

.loop_out:
        stdcall FileReadLine, [hfile]
        test    eax, eax
        jz      .end_of_file2

        mov     edi, eax

        stdcall GetTimestamp
        push    eax

        stdcall ComputeMinDist, edx, ecx, edi
        mov     esi, eax

        stdcall GetTimestamp
        sub     eax, [esp]
        mov     [esp], eax

        stdcall NumToStr, [cnt], ntsDec or ntsUnsigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack

        inc     [cnt]

        stdcall FileWriteString, [STDOUT], txt ": Dist: "

        stdcall NumToStr, esi, ntsDec or ntsUnsigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack

        stdcall FileWriteString, [STDOUT], txt ", Time: "

        pop     eax
        add     [total], eax

        stdcall NumToStr, eax, ntsDec or ntsSigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack
        stdcall FileWriteString, [STDOUT], <txt " ms, Index: ">

        stdcall NumToStr, ebx, ntsDec or ntsSigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack
        stdcall FileWriteString, [STDOUT], <txt 13, 10>


        stdcall StrDel, edi
        jmp     .loop_out


.end_of_file2:
        stdcall FileClose, [hfile]

        stdcall FileWriteString, [STDOUT], <txt 13, 10, "Total time spend: ">
        stdcall NumToStr, [total], ntsDec or ntsSigned
        push    eax
        stdcall FileWriteString, [STDOUT], eax
        stdcall StrDel ; from the stack
        stdcall FileWriteString, [STDOUT], <txt " ms", 13, 10>

.finalize:
        FinalizeAll
        stdcall TerminateAll, 0




proc Levenshtein, .pString1, .len1, .pString2, .len2, .Limit
.buff0  rb 256
begin
        push    ebx ecx edx esi edi

        movd    mm2, [.pString2]

        mov     edi, [.pString1]
        mov     ecx, [.len1]

; scan the equal prefix

        movd    esi, mm2
        mov     edx, [.len2]

.scan_prefix:
        cmpsb
        jne     .end_prefix

        dec     ecx
        dec     edx
        jmp     .scan_prefix

.end_prefix:
        dec     esi
        dec     edi

        movd    mm2, esi

; scan for the equal suffix.

.scan_suffix:
        dec     edx
        dec     ecx
        mov     al, [esi+edx]
        cmp     al, [edi+ecx]
        je      .scan_suffix

        inc     ecx
        inc     edx
        mov     [.len2], edx

        cmp     ecx, 0
        cmovle  eax, edx
        jle     .finish

        cmp     edx, 0
        cmovle  eax, ecx
        jle     .finish

; Init the matrix column

        xor     eax, eax
        inc     eax

.init0:
        mov     [.buff0 + edx], al
        inc     eax
        dec     edx
        jns     .init0

; Start of the distance computing

        xor     ebx, ebx

.loop1:
        mov     edx, [.len2]

        mov     bl, [.buff0 + edx]
        inc     [.buff0 + edx]

        mov     al, [edi]
        inc     edi

        mov     ah, al
        movd    mm0, eax
        punpcklbw mm0, mm0              ; 00 00 00 00 al al al al

        movd    esi, mm2

.loop2:
        movd    mm1, [esi]
        lea     esi, [esi+4]

        pcmpeqb mm1, mm0
        movd    eax, mm1
        mov     ch,  4

.loop3:
        dec     edx
        js      .exit

        add     bl, al     ; bl = D[i-1, j-1]+1; al == (0, -1)

        mov     al, [.buff0 + edx + 1]              ; [.buff + edx - 1] == D[i, j-1] + 1
        cmp     bl, al
        cmova   ebx, eax

        mov     al, [.buff0 + edx]          ; [.buff + edx] == D[i-1, j] + 1
        cmp     bl, al
        cmova   ebx, eax

        inc      bl

        mov     [.buff0 + edx], bl          ; xchg    bl, [.buff + edx]       ; [.buff + edx] = D[i, j]+1
        mov     bl, al                      ;

        shr     eax, 8

        dec     ch
        jnz     .loop3
        jmp     .loop2

; check the limit.

.exit:
        mov     al, [.buff0]
        sub     al, cl
        jns     @f
        neg     al
@@:
        cmp     al, byte [.Limit]
        jae     .finish

        dec     cl
        jnz     .loop1

.finish:
        movzx   eax, al
        pop     edi esi edx ecx ebx
        return
endp



proc Levenshtein2, .pString1, .len1, .pString2, .len2, .Limit
.buff0  rb 256
begin
        push    ebx ecx edx esi edi

; scan the equal prefix

        mov     edi, [.pString1]
        mov     ecx, [.len1]

        mov     esi, [.pString2]
        mov     edx, [.len2]

.scan_prefix:
        cmpsb
        jne     .end_prefix

        dec     ecx
        dec     edx
        jmp     .scan_prefix

.end_prefix:
        dec     esi
        dec     edi

        mov     [.pString2], esi

; scan for the equal suffix.

.scan_suffix:
        dec     edx
        dec     ecx
        mov     al, [esi+edx]
        cmp     al, [edi+ecx]
        je      .scan_suffix

        inc     ecx
        inc     edx
        mov     [.len2], edx

        cmp     ecx, 0
        cmovle  eax, edx
        jle     .finish

        cmp     edx, 0
        cmovle  eax, ecx
        jle     .finish

; Init the matrix column

        xor     eax, eax
        inc     eax

.init0:
        mov     [.buff0 + edx], al
        inc     eax
        dec     edx
        jns     .init0

; Start of the distance computing

        xor     ebx, ebx

.loop1:
        mov     edx, [.len2]

        mov     bl, [.buff0 + edx]
        inc     [.buff0 + edx]

        mov     ch, [edi]
        inc     edi

        mov     esi, [.pString2]

.loop2:
        cmp     ch, [esi]
        lea     esi, [esi+1]
        sete    al

        sub     bl, al                          ; bl = D[i-1, j-1]+1; al == (0, 1)

        mov     al, [.buff0 + edx]              ; [.buff + edx] == D[i, j-1] + 1
        cmp     bl, al
        cmova   ebx, eax

        mov     al, [.buff0 + edx - 1]          ; [.buff + edx - 1] == D[i-1, j] + 1
        cmp     bl, al
        cmova   ebx, eax

        inc      bl

        mov     [.buff0 + edx - 1], bl          ; xchg    bl, [.buff + edx - 1]       ; [.buff + edx - 1] = D[i, j]+1; bl = previous D[i,j]
        mov     bl, al                          ;

        dec     edx
        jnz     .loop2

; check the limit.
        mov     al, [.buff0]
        sub     al, cl
        jns     @f
        neg     al
@@:
        cmp     al, byte [.Limit]
        jae     .finish

        dec     cl
        jnz     .loop1

.finish:
        movzx   eax, al
        pop     edi esi edx ecx ebx
        return
endp









; returns the TText structure with the strings in EDX
; count of the strings in ECX

proc ReadDictionary, .hFileName
.line dd ?
.cnt  dd ?
begin
        pushad

        xor     eax, eax
        mov     [.cnt], eax

        stdcall TextCreate, sizeof.TText
        mov     edx, eax

        stdcall FileOpen, [.hFileName]
        mov     ebx, eax

.read_file:
        stdcall FileReadLine, ebx
        test    eax, eax
        jz      .end_of_file

        mov     [.line], eax

        stdcall StrLen, [.line]
        push    eax

        lea     eax, [eax + 32]
        stdcall TextSetGapSize, edx, eax

        pop     ecx

        mov     edi, [edx+TText.GapBegin]
        mov     dword [edx+edi], ecx

        lea     edi, [edx+edi+4]
        stdcall StrPtr, [.line]
        mov     esi, eax

        rep movsb

        xor     eax, eax
        stosd

        mov     ecx, edi
        add     ecx, 7
        and     ecx, $fffffff8
        sub     ecx, edi

        xor     eax, eax
        rep stosb

        sub     edi, edx
        mov     [edx+TText.GapBegin], edi

        stdcall StrDel, [.line]

        inc     [.cnt]
        jmp     .read_file


.end_of_file:
        stdcall FileClose, ebx

        mov     ecx, [.cnt]

        mov     [esp+4*regECX], ecx
        mov     [esp+4*regEDX], edx
        popad
        return
endp


; Returns:
;   EAX = minimal distance.
;   EBX = zero based index where the minimum were found.

proc ComputeMinDist, .pDict, .DictSize, .hString
begin
        pushad

        stdcall StrPtr, [.hString]
        mov     edi, eax

        mov     edx, [.pDict]

        mov     esi, 64
        xor     ebx, ebx
        dec     ebx

        xor     ecx, ecx

.loop:
        cmp     ecx, [.DictSize]
        jae     .finish

        add     edx, 4
        stdcall Levenshtein2, edx, [edx-4], edi, [edi+string.len], esi
        cmp     esi, eax
        cmovg   esi, eax
        cmovg   ebx, ecx

        add     edx, [edx-4]
        add     edx, 4 + 7
        and     edx, $fffffff8

        inc     ecx
        jmp     .loop

.finish:
        mov     [esp+4*regEAX], esi
        mov     [esp+4*regEBX], ebx
        popad
        return
endp
