Visual Basic for Applications/Compare Shuffle Methods for Bias

Summary

edit

This VBA code module shows how bias can affect a shuffle algorithm, even when a random generator is used. The module is intended for MS Excel and prints two statistics panels on the worksheet to show how the selected methods differ. The good version is the Knuth shuffle algorithm, and the other is one that makes methodical swaps, sometimes multiple swaps, throughout the length of the array.

Notes on the code

edit
  • Copy the entire code listing into an Excel Standard module, save it, then run the top procedure.
  • The shuffle algorithms run for many cycles, (user set), and the bin count of discrete outcomes is presented in statistics. Notice that a very small number of elements, (3 or 4), has been used to keep the display and run time manageable. It may be of interest to know that the number of discrete outcome combinations is equal to the factorial of the number of elements. That is; using just six elements, say A,B,C,D,E,and F, would result in 6! = 120 discrete combinations, and a corresponding increase in the time to run.
  • In particular, it should be noted that a small coefficient of variation (CV) denotes a method that is close to random whereas one with a higher CV shows that there is bias in the method.
  • The method that uses multiple swaps is biased, whereas the Knuth method is not.

VBA Code Module

edit
Option Explicit
Option Base 1

Private Sub RunShuffleBiasDemo()
    'Run this to compare the bias for shuffling
    'character arrays using two methods.
    'Note large sets are too time consuming,
    'since there are n! combinations.
    'THIS SUB CLEARS AND WRITES TO SHEET1
    
    Dim vArr As Variant, vRet As Variant
    Dim nT As Long, bOK As Boolean, vT As Variant
    Dim nCycles As Long, n As Long, sT As String
    
    'load a typical 1D data array with test data
    '3 elements makes 3! = 6 bins
    '4 elements needs 4! = 24 bins etc.
    vArr = Array("A", "B", "C")
    
    'set number of cycles for test
    'Typical time to complete 3 element test:
    '25 secs for 100000 cycles
    '225 secs for 1000000 cycles
    nCycles = 100000
    
        
    'dimension the collection array
    ReDim vT(1 To nCycles)
        
    'clear the worksheet
    ClearWorksheet "Sheet1", 3 'contents and formats
        
    'runs number of shuffle samples
    For n = 1 To nCycles
        'give way to commands-eg; break
        DoEvents
        
        'pass array to a random shuffling proc
        bOK = KnuthArrShuffle(vArr, vRet)
                
        'make a single string from array elements
        sT = Arr1DToStr(vRet)
        
        'save shuffle instance in an array
        vT(n) = sT
    Next n
       
    'pass array to the proc with label for the display
    CountUniqueArrayValues2 vT, 2, 2, "Test Set: Rnd Knuth"
    
    'run a number of shuffle samples
    For n = 1 To nCycles
        'give way to commands-eg; break
        DoEvents
        
        'pass array to a random shuffling proc
        bOK = BiasedMultiSwapArrShuffle(vArr, vRet)
        
        'make a single string from array elements
        sT = Arr1DToStr(vRet)
        
        'save shuffle instance in an array
        vT(n) = sT
    Next n
       
    'pass array to the proc with label for the display
    CountUniqueArrayValues2 vT, 2, 7, "Test Set: Rnd Biased?"

    'report end
    MsgBox "Display done."

End Sub

Private Sub CountUniqueArrayValues2(vI As Variant, Optional nRow As Long = 1, _
                                   Optional nCol As Long = 1, Optional sLabel As String = "")
    'Counts instances of data number values in vI. Generates various stats
    'for the bin quantities.
    'THIS SUB CLEARS AND WRITES TO SHEET1
    
    Dim vRV As Variant, vRQ As Variant, vDS As Variant
    Dim LB As Long, UB As Long, vDB As Variant
    Dim n As Long, bOK As Boolean
    
    'make bins and count contents
    bOK = DiscreteItemsCount(vI, vRV, vRQ)
    
    LB = LBound(vRV, 1): UB = UBound(vRV, 1)
    ReDim vDS(1 To 12, 1 To 3)
    ReDim vDB(LB To UB + 2, 1 To 3)
    
    If bOK Then 'load bins and stats arrays
        vDB(1, 1) = sLabel: vDB(1, 2) = "Value": vDB(1, 3) = "Quantity"
        For n = LB To UB
             DoEvents
             vDB(n + 2, 1) = "Bin # " & n 'headings
             vDB(n + 2, 2) = vRV(n)       'value
             vDB(n + 2, 3) = vRQ(n)       'quantity
        Next n
        
        On Error Resume Next 'avoids Mode() error when no value stands out
        With Application.WorksheetFunction
            vDS(1, 1) = sLabel: vDS(1, 2) = "": vDS(1, 3) = "Quantity"
            vDS(3, 1) = "Average": vDS(3, 3) = Format(.Average(vRQ), "#0.000")
            vDS(4, 1) = "Median": vDS(4, 3) = .Median(vRQ)
            vDS(5, 1) = "Mode": vDS(5, 3) = .Mode(vRQ)
            vDS(6, 1) = "Minimum": vDS(6, 3) = .Min(vRQ)
            vDS(7, 1) = "Maximum": vDS(7, 3) = .Max(vRQ)
            vDS(8, 1) = "Std.Deviation": vDS(8, 3) = Format(.StDevP(vRQ), "#0.000")
            vDS(9, 1) = "StDev/AvĀ %": vDS(9, 3) = Format(.StDevP(vRQ) * 100 / .Average(vRQ), "#0.000")
            vDS(10, 1) = "Variance": vDS(10, 3) = Format(.VarP(vRQ), "#0.000")
            vDS(11, 1) = "No.Unique Values": vDS(11, 3) = UBound(vRQ) - LBound(vRQ) + 1
            vDS(12, 1) = "No.Samples": vDS(12, 3) = UBound(vI) - LBound(vI) + 1
        End With
        Err.Clear
    Else
        MsgBox "Problems getting bin count - closing"
        Exit Sub
    End If
    
    'output to sheet
    'ClearWorksheet "Sheet1", 3                    'clear both contents and formats of the worksheet
    Array2DToSheet vDS, "Sheet1", nRow, nCol      'transfer stats panel to sheet with top left at row2, col2
    Array2DToSheet vDB, "Sheet1", nRow + 13, nCol 'transfer bins panel to sheet with top left below stats
    FormatCells "Sheet1"                          'apply font and autofit formats to all cells of the worksheet

End Sub

Private Function DiscreteItemsCount(vIn As Variant, vRetV As Variant, vRetQ As Variant) As Boolean
    'Counts number of repeats of element values found in vIn
    'Returns with one column for each unitque value and quantity found.
    'Returns as 2D vRet, unsorted; row1=input value, row2=item count.

    Dim vA As Variant, vTS As Variant, vTB As Variant
    Dim s As Long, b As Long, n As Long, bFound As Boolean
    Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
    Dim LBS As Long, UBS As Long
    
    'dimension 2D work array
    ReDim vA(1 To 2, 1 To 1)
    
    'get source 1D array bounds
    LBS = LBound(vIn): UBS = UBound(vIn)
    
    'get work array bounds
    LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
    LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
    
    'intitial values
    s = LBS
    b = 0
    vA(2, 1) = 0
    
    Do 'step through store
        DoEvents
        'get source element value
        vTS = vIn(s)
        'check bins
        Do
            DoEvents
            b = b + 1
            'get bin element value
            vTB = vA(1, b)
            If vTS = vTB Then 'found in bins
                vA(2, b) = CLng(vA(2, b)) + 1 'update bin
                bFound = True
            End If
        Loop Until b >= UB2 Or bFound = True
        
        If bFound = False Then 'no such bin exists yet
            'not found in bins
            If vA(2, UB2) <> 0 Then 'first element been used
                ReDim Preserve vA(LB1 To UB1, LB2 To UB2 + 1)
                UB2 = UBound(vA, 2)
            End If
            'update new bin
            vA(1, UB2) = vTS
            vA(2, UB2) = 1
            bFound = True
        End If
        
        'reset loop variables
        bFound = False
        b = 0
        s = s + 1
    Loop Until s > UBS

    'transfers -need to be separate for other uses
    LB1 = LBound(vA, 1): UB1 = UBound(vA, 1) 'd1 rows
    LB2 = LBound(vA, 2): UB2 = UBound(vA, 2) 'd2 cols
    ReDim vRetV(LB2 To UB2) 'contains values
    ReDim vRetQ(LB2 To UB2) 'contains quantities
    
    For n = LB2 To UB2
        vRetV(n) = vA(1, n)
        vRetQ(n) = vA(2, n)
    Next n
        
    For n = LB2 To UB2
        Debug.Print vRetV(n) & vbTab & vRetQ(n)
    Next n
        Debug.Print vbCrLf
   
   DiscreteItemsCount = True

End Function

Private Sub ClearWorksheet(ByVal sSheet As String, Optional ByVal nOpt As Integer = 1)
   'clears worksheet contents, formats, or both
   'but does not remove charts from the worksheet
   'nOpt options: contents=1, formats=2, all=3
      
   Dim oWSht As Worksheet
   Set oWSht = ThisWorkbook.Worksheets(sSheet)
   oWSht.Activate
   
   With oWSht.Cells
    Select Case nOpt
        Case 1 'contents only
            .ClearContents
        Case 2 'formats only
            .ClearFormats
        Case 3 'formats and contents
            .Clear
    Case Else
        MsgBox "Illegal option in ClearWorksheet - closing"
        Exit Sub
    End Select
   End With
   
   oWSht.Cells(1, 1).Select

End Sub

Private Sub Array2DToSheet(ByVal vIn As Variant, sShtName As String, nStartRow As Long, nStartCol As Long)
    ' transfers contents of input 2D array to specified worksheet positions
    ' Works for any array bounds
    
    Dim oSht As Worksheet, rTarget As Range
    Dim nRows As Long, nCols As Long
    Dim nNewEndC As Long, nNewEndR As Long
    
    'get reference to sheet for output
    Set oSht = ActiveWorkbook.Worksheets(sShtName)

    'get the pre-shift end points
    nRows = UBound(vIn, 1) - LBound(vIn, 1) + 1
    nCols = UBound(vIn, 2) - LBound(vIn, 2) + 1
    
    'modify end point for parameter starting values
    nNewEndR = nRows + nStartRow - 1
    nNewEndC = nCols + nStartCol - 1
       
    ' define the sheet range for the array contents
    Set rTarget = oSht.Range(oSht.Cells(nStartRow, nStartCol), oSht.Cells(nNewEndR, nNewEndC))
    
    'transfer the array contents to the sheet range
    rTarget.Value = vIn

End Sub

Private Sub FormatCells(sSht As String)
    ' Applies certain formats to all cells
    ' of the named parameter worksheet
    
    Dim oSht As Worksheet
    
    Set oSht = ThisWorkbook.Worksheets(sSht)
    oSht.Activate
    
    'format all cells of the worksheet
    oSht.Cells.Select
    With Selection
        .Font.Name = "Consolas" 'mono
        .Font.Size = 14
        .Columns.AutoFit
        .Rows.AutoFit
        .HorizontalAlignment = xlLeft 'xlRight 'xlCenter
        .VerticalAlignment = xlBottom 'xlCenter 'xlTop
    End With
    oSht.Range("A1").Select

End Sub

Private Function KnuthArrShuffle(vIn As Variant, vR As Variant) As Boolean
    ' Performs a modified Knuth random shuffle on the elements of the input array.
    ' The original by Fisher-Yates, was modified for computers by Durstenfeld
    ' then popularised by Knuth. Returns result in vR with vIn unchanged.
       
    'Note: Some or all elements COULD occupy their original positions,
    'but only in accordance with expectation based on the random generator.
    'This is best seen for small arrays, say with only 3 elements or so.
        
    Dim vW As Variant
    Dim LB As Long, UB As Long, nL As Long
    Dim i As Long, j As Long
    
    'initial assignments
    LB = LBound(vIn): UB = UBound(vIn)
    ReDim vR(LB To UB) 'return array
    ReDim vW(LB To UB) 'work array
    nL = UB - LB + 1   'length of input array
    vW = vIn 'transfer to a work array
            
    'working
    Randomize
    For i = LB To nL
        'first get a random number
        j = Int((UB - LB + 1) * Rnd + LB)
            
        'transfer jth of vW to ith of vR
        vR(i) = vW(j)
        
        'replace selection with current last of vW
        vW(j) = vW(UB)
        
        'remove last of vW by shortening array
        ReDim Preserve vW(LB To UB - 1)
        
        'get new UBound of shortened vW
        UB = UBound(vW)
        
        'exception; return if last chara
        If UB = LB Then
            vR(i + 1) = vW(UB)
            Exit For
        End If
                
        DoEvents 'allow breaks
    Next i
        
    KnuthArrShuffle = True

End Function

Private Function BiasedMultiSwapArrShuffle(vIn As Variant, Optional vRet As Variant) As Boolean
    'Performs a random shuffle on input array strings.
    'Input parameter is a single array.  Returns in single vRet
    'if provided, else in vIn modified.  Multiple shuffles applied.
    'Displays more bias than the Knuth method.
    
    Dim vR As Variant
    Dim sT As String, sTJ As String, sTS As String, nC As Long
    Dim LB1 As Long, UB1 As Long, LB2 As Long, UB2 As Long
    Dim i As Integer, j As Long, bSUsed As Boolean, bRUsed As Boolean

    'get dimensions of vIn
    LB1 = LBound(vIn): UB1 = UBound(vIn)

    'dimension a work array
    ReDim vR(LB1 To UB1)

    'load local array
    For i = LB1 To UB1
        DoEvents
        vR(i) = vIn(i)
    Next i

    'get dimensions of vR
    LB2 = LBound(vR): UB2 = UBound(vR)

    'randomize the rnd generator
    Randomize

    For i = LB2 To UB2
        
        DoEvents
        'get rnd number
        j = Int((UB2 - LB2 + 1) * Rnd + LB2)

        'exchange elements
        sT = vR(i) 'swap
        vR(i) = vR(j)
        vR(j) = sT
    Next i

    'transfers
    If Not IsMissing(vRet) Then
        ReDim vRet(LB2 To UB2)
        For i = LB2 To UB2
            DoEvents
            vRet(i) = vR(i)
        Next i
    Else
        For i = LB2 To UB2
            DoEvents
            vIn(i) = vR(i)
        Next i
    End If

    'return status
    BiasedMultiSwapArrShuffle = True

End Function

Private Function Arr1DToStr(vIn As Variant) As String
    ' Makes a single string from 1D array string elements.
    ' Works for any array bounds.

    Dim nC As Long, sT As String, sAccum As String
    Dim LB As Long, UB As Long

    LB = LBound(vIn): UB = UBound(vIn)

    'join characters of array into string
    For nC = LB To UB
        DoEvents
        sT = vIn(nC)
        sAccum = sAccum & sT
    Next

    Arr1DToStr = sAccum

End Function

See Also

edit