Attribute VB_Name = "modConvertDictionary"
' ---------------------------------------------------------------------
'modConvertDictionary: Utility to convert a standard dictionary file
'  to the format used by this spell checker.  Note: This is a stand
'  alone function, and it's included in this project only for
'  reference.  It's been disabled by the module constant.  If you
'  should try to run it, be warned: It's SLOW.
'
'  Created: 2000-07-11 by Scott Seligman <scott@scottandmichelle.net>
' ---------------------------------------------------------------------


'Constant to disable this module, just so it doesn't get in the way
#Const ConvertDictionary = False

#If ConvertDictionary Then
Option Explicit

Private Declare Function GetCurrentTime Lib "kernel32" Alias _
   "GetTickCount" () As Long

Private m_cCompress As Collection

Private Type DictPointerType
   TotalWords As Integer
   Block As Byte
   Offset As Integer
   WordLength As Integer
End Type

Private Type DictCompressType
   RealSize As Byte
   Letter(1 To 5) As Byte
End Type

Private m_sCompress(1 To 255) As String

Public Sub ConvertDictionary()

   Dim sInFile As String
   Dim sOutFile As String
   
   sInFile = App.Path & "\Original Dictionary.txt"
   sOutFile = App.Path & "\SpellWrd.dat"
   
   If Dir(sOutFile) <> "" Then
      Kill sOutFile
   End If
   
   Set m_cCompress = New Collection
   
   Dim cWords As Collection
   Dim cTemp As Collection
   Dim nFile As Long
   Dim sWord As String
   Dim sSoundex As String
   Dim nLast As Long
   Dim nCur As Long
   Dim Dummy As clsSpellWords
   Set Dummy = New clsSpellWords
   
   nFile = FreeFile
   Open sInFile For Input As #nFile
   nLast = GetCurrentTime
   Set cWords = New Collection
   
   Debug.Print "-- Loading dictionary"
   
   Do While Not EOF(nFile)
   
      Line Input #nFile, sWord
      sWord = LCase(sWord)
      
      If Asc(Mid(sWord, 1, 1)) >= 97 And _
         Asc(Mid(sWord, 1, 1)) <= 122 And Len(sWord) >= 2 Then
      
         ProcessWord Mid(sWord, 2)
      
         sSoundex = Dummy.Soundex(sWord)
         sWord = Mid(sWord, 2)
         Set cTemp = Nothing
         
         On Error Resume Next
         Set cTemp = cWords.Item(sSoundex)
         Err.Clear
         On Error GoTo 0
         
         If cTemp Is Nothing Then
            Set cTemp = New Collection
            cTemp.Add sSoundex
            cWords.Add cTemp, sSoundex
         End If
         
         cTemp.Add sWord
         
         nCur = GetCurrentTime
         If nCur - nLast >= 1000 Then
            nLast = nLast + 1000
            Debug.Print Mid(sSoundex, 1, 1) & sWord
         End If
         
      End If
   Loop
   
   Debug.Print "-- Done loading dictionary, starting to create blocks"
   Close

   Dim Pointers(1 To 26, 0 To 6, 0 To 6, 0 To 6) As DictPointerType
   Dim cBlocks As Collection
   Dim obj As clsSpellBlock
   Dim bFirst As Boolean
   Dim vWord As Variant
   Dim sComplete As String
   Dim nCurPos As Long
   Dim i As Long
   
   Set cBlocks = New Collection
   
   nCurPos = 1
   Set obj = New clsSpellBlock
   
   Dim cMaxWords As Collection
   
   PickTopCompress
   PrepareTopCompress
   
   For Each cTemp In cWords
   
      bFirst = True
      sComplete = ""
      
      For Each vWord In cTemp
         If bFirst Then
            bFirst = False
            sSoundex = vWord
         Else
            sComplete = sComplete & CompressWord(vWord) & Chr(0)
         End If
      Next
      
      If nCurPos + Len(sComplete) > 32767 Then
         cBlocks.Add obj
         Set obj = Nothing
         Set obj = New clsSpellBlock
         Debug.Print "block number " & Format(cBlocks.Count) & _
            " added (" & Format(32767 - nCurPos, "#,##0") & _
            " bytes of slack)"
         nCurPos = 1
      End If
      
      For i = 1 To Len(sComplete)
         obj.Block(nCurPos + i - 1) = Asc(Mid(sComplete, i, 1))
      Next
      
      Dim nMaxBlock As Long
      Dim nMaxOffset As Long
      Dim nMaxTotalWords As Long
      Dim nMaxWordLength As Long
      
      With Pointers(Asc(Mid(sSoundex, 1, 1)) - 64, _
         Val(Mid(sSoundex, 2, 1)), Val(Mid(sSoundex, 3, 1)), _
         Val(Mid(sSoundex, 4, 1)))
         
         .Block = cBlocks.Count + 1
         .Offset = nCurPos
         .TotalWords = cTemp.Count - 1
         .WordLength = Len(sComplete)
         
         If .Block > nMaxBlock Then nMaxBlock = .Block
         
         If .Offset > nMaxOffset Then nMaxOffset = .Offset
         
         If .TotalWords > nMaxTotalWords Then
            nMaxTotalWords = .TotalWords
            Set cMaxWords = cTemp
         End If
         
         If .WordLength > nMaxWordLength Then
            nMaxWordLength = .WordLength
         End If
         
      End With
      nCurPos = nCurPos + Len(sComplete)
   Next
   
   Debug.Print "Max Block: " & Format(nMaxBlock, "#,##0")
   Debug.Print "Max Offset: " & Format(nMaxOffset, "#,##0")
   Debug.Print "Max Total Words: " & Format(nMaxTotalWords, "#,##0")
   Debug.Print "Max Word Length: " & Format(nMaxWordLength, "#,##0")
   Debug.Print "Sample from Max Words: " & cMaxWords.Item(1) & _
      ", " & cMaxWords.Item(2) & ", " & cMaxWords.Item(3) & _
      ", " & cMaxWords.Item(4)
   
   cBlocks.Add obj
   Debug.Print "block number " & Format(cBlocks.Count) & " added (" _
      & Format(32767 - nCurPos, "#,##0") & " bytes of slack)"
   
   Debug.Print "-- Completed filling in the blocks, " & _
               "writing output file now"
   
   Dim nBlocks As Long
   Dim nTheBlock(1 To 32767) As Byte
   Dim Compressed(1 To 255) As DictCompressType
   Dim j As Long
   For i = 1 To 255
      Compressed(i).RealSize = Len(m_sCompress(i))
      For j = 1 To Len(m_sCompress(i))
         Compressed(i).Letter(j) = Asc(Mid(m_sCompress(i), j, 1))
      Next
   Next
   
   nBlocks = cBlocks.Count
   
   nFile = FreeFile
   Open sOutFile For Binary As #nFile
   Put #nFile, , nBlocks
   Put #nFile, , Pointers()
   Put #nFile, , Compressed()
   
   Debug.Print "Wrote header"
   
   nBlocks = 1
   For Each obj In cBlocks
      For i = 1 To 32767
         nTheBlock(i) = obj.Block(i)
      Next
      Debug.Print "Wrote block number " & Format(nBlocks) & "."
      Put #nFile, , nTheBlock()
      nBlocks = nBlocks + 1
   Next
   
   Close #nFile

   Debug.Print "-- All Done"

End Sub

Private Function CompressWord(ByVal sIn As String) As String

   Dim i As Long
   For i = 255 To 1 Step -1
      sIn = Replace(sIn, m_sCompress(i), Format(i, "000"))
   Next
   Do Until sIn = ""
      CompressWord = CompressWord & Chr(Val(Mid(sIn, 1, 3)))
      sIn = Mid(sIn, 4)
   Loop

End Function

Private Sub PrepareTopCompress()

   Dim i As Long
   For i = 1 To 26
      m_sCompress(i) = Chr(96 + i)
   Next
   m_sCompress(27) = "'"
   
   i = 28
   Dim vItem As Variant
   Dim nMax As Long
   For Each vItem In m_cCompress
      If Len(Mid(vItem, 17)) > nMax Then
         nMax = Len(Mid(vItem, 17))
      End If
      m_sCompress(i) = Mid(vItem, 17)
      i = i + 1
   Next
   
   Debug.Print "Max word length: " & nMax

End Sub

Private Sub ProcessWord(sWord As String)

   Dim sTemp As String
   Dim sPart As String

   Dim nLen As Long
   Dim nOff As Long
   
   For nLen = 2 To Len(sWord)
      For nOff = 0 To Len(sWord) - nLen
         sPart = Mid(sWord, nOff + 1, nLen)
         On Error Resume Next
         sTemp = ""
         sTemp = m_cCompress("str" & sPart)
         Err.Clear
         On Error GoTo 0
         If sTemp = "" Then
            sTemp = Format(1, String(16, "0")) & sPart
         Else
            sTemp = Format(Val(Mid(sTemp, 1, 16)) + 1, _
                    String(16, "0")) & sPart
            m_cCompress.Remove "str" & sPart
         End If
         m_cCompress.Add sTemp, "str" & sPart
      Next
   Next

End Sub

Private Sub PickTopCompress()

   Debug.Print "-- Sorting out compressing logic"

   Sort m_cCompress
   
   Dim cOut As Collection
   Set cOut = New Collection
   Dim vItem As Variant
   Dim nCount As Long
   
   For Each vItem In m_cCompress
      nCount = nCount + 1
      cOut.Add vItem
      If nCount >= 228 Then
         Exit For
      End If
   Next
   
   Set m_cCompress = Nothing
   Set m_cCompress = cOut
   Set cOut = Nothing

End Sub

Private Sub Sort(col As Collection)

   If col.Count <= 1 Then
      Exit Sub
   End If

   Dim cMin As Collection
   Dim cMax As Collection
   Dim cMid As Collection
   
   Dim nMid As Long
   
   Set cMin = New Collection
   Set cMax = New Collection
   Set cMid = New Collection
   
   nMid = Val(Mid(col.Item(1), 1, 16) * Len(col.Item(1)) - 17)
   cMid.Add col.Item(1)
   col.Remove 1
   
   Dim nItem As Long
   Do While col.Count > 0
      nItem = Val(Mid(col.Item(1), 1, 16) * Len(col.Item(1)) - 17)
      If nItem < nMid Then
         cMax.Add col.Item(1)
      ElseIf nItem > nMid Then
         cMin.Add col.Item(1)
      Else
         cMid.Add col.Item(1)
      End If
      col.Remove 1
   Loop

   Sort cMin
   Sort cMax
   
   Do While cMin.Count > 0
      col.Add cMin.Item(1)
      cMin.Remove 1
   Loop

   Do While cMid.Count > 0
      col.Add cMid.Item(1)
      cMid.Remove 1
   Loop

   Do While cMax.Count > 0
      col.Add cMax.Item(1)
      cMax.Remove 1
   Loop

End Sub

#End If 'ConvertDictionary
