• Coding
  • [Exercise] Walk the grid!

Imagine a small mouse moving on an infinite grid. Each step, the mouse moves to an adjacent cell, with the following probabilities:
  • 20% chance that it goes North
  • 20% chance that it goes South
  • 30% chance that it goes East
  • 30% chance that it goes West
What is the probability that it will walk over any cell more than once after 13 moves?
Let's assume a 27x27 grid. If the root cell is situated at the center and we're only going to have 13 moves, then we aren't going to need a grid larger than that.

I have solved the problem and the results are pretty surprising to me.

Here's the code in C++:

Cell.h
#ifndef _CELL_H_
#define _CELL_H_

struct Cell {
    int data;
    Cell();
};

#endif
Cell.cpp
#include "Cell.h"

Cell::Cell() {
    data = 0;
}
Grid.h
#ifndef _GRID_H_
#define _GRID_H_

#include "Cell.h"
#include "Constants.h"

struct Grid {
    static Cell* cells;
    static void init();
    static void cleanUp();
};

#endif
Grid.cpp
#include "Grid.h"

Cell* Grid::cells;

void Grid::init() {
    cells = new Cell[SIZE * SIZE];
}

void Grid::cleanUp() {
    if (cells) {
        delete cells;
    }
}
Constants.h
#ifndef _CONSTANTS_H_
#define _CONSTANTS_H_

#define MOVES 13
#define SIZE 27

#endif
Mouse.h
#ifndef _MOUSE_H_
#define _MOUSE_H_

#include "Grid.h"
#include "Constants.h"

struct Mouse {
    int x;
    int y;
    Mouse();
    Cell* moveNorth();
    Cell* moveSouth();
    Cell* moveEast();
    Cell* moveWest();
    Cell* moveToCenter();
};

#endif
Mouse.cpp
#include "Mouse.h"

Mouse::Mouse() {
    x = -1;
    y = -1;
}

Cell* Mouse::moveNorth() {
    y++;
    Grid::cells[x * SIZE + y].data = Grid::cells[x * SIZE + y].data + 1;
    return &Grid::cells[x * SIZE + y];
}

Cell* Mouse::moveSouth() {
    y--;
    Grid::cells[x * SIZE + y].data = Grid::cells[x * SIZE + y].data + 1;
    return &Grid::cells[x * SIZE + y];
}

Cell* Mouse::moveEast() {
    x++;
    Grid::cells[x * SIZE + y].data = Grid::cells[x * SIZE + y].data + 1;
    return &Grid::cells[x * SIZE + y];
}

Cell* Mouse::moveWest() {
    x--;
    Grid::cells[x * SIZE + y].data = Grid::cells[x * SIZE + y].data + 1;
    return &Grid::cells[x * SIZE + y];
}
	
Cell* Mouse::moveToCenter() {
    x = 13;
    y = 13;
    Grid::cells[13 * SIZE + 13].data = Grid::cells[13 * SIZE + 13].data + 1;
    return &Grid::cells[13 * SIZE + 13];
}
main.cpp
#include <iostream>
#include <vector>

#include "Grid.h"
#include "Mouse.h"
#include "Constants.h"
#include "Cell.h"

using namespace std;

inline int GetRandomNumber(int min, int max) {
    return min + (rand() % (max - min + 1));
}

int main() {
    /*
    *   Cool way to seed the 
    *   pseudo-random number generator.
    *   It's cool because it doesn't 
    *   require you to include a time 
    *   library.
    */
    int k = 0;
    srand((int)&k);

    /*
    *   Variable declarations
    */
    Mouse mouse;
    vector<Cell*> cells;
    bool conditionMet = false;
    int instances = 0;

    /*
    *   Initialize the grid.
    */
    Grid::init();

    /*
    *   We're going to do the test 3 times.
    */
    int j = 0;
    while (j++ != 3) {
        /*
        *   We're testing for 1000 different 
        *   mouse movement instances.
        */
        for (int l = 0; l < 1000; l++) {
            /*
            *   We move our mouse to the center 
            *   of the grid first.
            */
            cells.push_back(mouse.moveToCenter());

            /*
            *   Now we allow the mouse to move 13 times 
            *   on the grid with the following probabilities:
            *
            *   - 20% North
            *   - 20% South
            *   - 30% East
            *   - 30% West
            */
            for (int i = 0; i < MOVES; i++) {
                k = GetRandomNumber(0, 99);

                if (k < 20) {
                    // We move north.
                    cells.push_back(mouse.moveNorth());
                } else if (k < 40) {
                    // We move south.
                    cells.push_back(mouse.moveSouth());
                } else if (k < 70) {
                    // We move east.
                    cells.push_back(mouse.moveEast());
                } else {
                    // We move west.
                    cells.push_back(mouse.moveWest());
                }
            }

            /*
            *   We loop over all the pushed cells to see 
            *   if any of them was encountered more than
            *   once. If a cell was encountered more than 
            *   once, this test is positive.
            */
            for (int i = 0; i < cells.size(); i++) {
                if (cells[i]->data > 1) {
                    conditionMet = true;
                }
                cells[i]->data = 0;
            }

            /*
            *   If the conditions were met, we increase 
            *   the integer representing the number of 
            *   positive tests and reset the conditionMet 
            *   boolean to it's original state.
            */
            if (conditionMet) {
                instances++;
                conditionMet = false;
            }

            /*
            *   Finally, we clear the vector.
            */
            cells.clear();
        }

        /*
        *   Now we display the number of postive tests so the user 
        *   wouldn't have to dump all of his RAM to a file and spend 
        *   weeks figuring out the results of the tests done. If you 
        *   do not care about the sanity of your users, please comment 
        *   out the following line.
        */
        cout << instances << "/1000" << endl;
        instances = 0;
    }

    /*
    *   Now we clean up those 2900 bytes allocated so time travelers 
    *   from 1978 don't complain about you taking up 18% of their RAM.
    */
    Grid::cleanUp();

    cin >> k;
    return 0;
}
Outputs:
First test:
-----------
992/1000
983/1000
993/1000

Second test:
------------
988/1000
985/1000
989/1000
98.83% of the time, the mouse is going to walk over the same cell more than once.
Good enough? :D
Here's my version in Python
def answer(X, probs):
  
    # This dict holds the probabilities given as arguments
    d = {"N": probs[0],
         "S": probs[1],
         "E": probs[2],
         "W": probs[3]}
  
    # A positive probability is the probability of a path that goes over
    # a sing cell twice occurring.
    # This generators yields all the possible ones
    positive_probabilities = (prob(x, d) for x in gen_paths(X) if twicep(x))
  
    s = 0
    for i in positive_probabilities:
        s += i
        
    return s

def prob_value(path, pvalue):
    s = 1
    for i in path:
        s *= pvalue[i]
    return s

twicep_cache = {'': [(0, 0)]}    
def is_twice(path):
    
    last, rest = path[-1], path[:-1]
    positions = twicep_cache[rest] # Retrieve previous data.
    
    current = positions[-1]
    if last == "N":
        a = positions + [(current[0], current[1]+1)]
    elif last == "S":
        a = positions + [(current[0], current[1]-1)]
    elif last == "E":
        a = positions + [(current[0]+1, current[1])]        
    elif last == "W":
        a = positions + [(current[0]-1, current[1])]        
  
    twicep_cache[path] = a
    
    # test if each element is unique
    return not len(a) == len(set(a))

  
def ptr_incr(ptr):
    """ appends in turn "N", "S", "W", "E" to each element
    that returns false for the is_twice() test.
    """
    
    if ptr == "":
        ptr = [""]
        
    for i in ptr:
        if i != "" and is_twice(i):
            yield i
        else:
            yield i + "N"
            yield i + "S"
            yield i + "W"
            yield i + "E"
I wrote an explanation of the code here. (I really should look into some better presentation for my blog).

I ran the code inside Python's C Profiler, like this:
import cProfile
import pstats
command = """reactor.run()"""
cProfile.runctx('print "value is %s" % prob_twice(10, [0.2, 0.2, 0.3, 0.3])',
                globals(),
                locals(),
                filename="grid.profile")
  
# load profile from disk
stats = pstats.Stats("grid.profile")
stats.print_stats()
And here's the result I get:
value is 0.988116008291
Sat Nov 24 22:01:25 2012    grid.profile

         11102675 function calls in 45.766 CPU seconds
   Random listing order was used

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.339    0.339   10.096   10.096 /tmp/py10922tQ1:16(gen_tree)
  2444068   41.276    0.000   41.725    0.000 /tmp/py10922tQ1:24(twicep)
  2444081    0.777    0.000    9.757    0.000 /tmp/py10922tQ1:2(ptr_incr)
   663193    0.868    0.000   35.457    0.000 /tmp/py10922tQ1:58()
        1    0.212    0.212   45.766   45.766 /tmp/py10922tQ1:52(prob_twice)
  4888136    0.450    0.000    0.450    0.000 {len}
   663192    1.844    0.000    1.844    0.000 /tmp/py10922tQ1:42(prob_value)
        1    0.000    0.000   45.766   45.766 :1()
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 {range}
@Magtheridon96: I hadn't thought of a statistical approach like this. I suggest you run a higher number of tests if you want a better number. It's up to you to define what you mean by "better answer".

However I feel that a statistical approach like this can only approximate the results, which is not the goal of this exercise.
I changed the number of tests to 30 instead of 3, so there are 30000 instances of the grid per execution.
I then executed my application 5 times and got the following output:
98.83%
98.8367%
98.8167%
98.8267%
98.7967%
It seems I'm very, very close to the actual value of 98.8116%.
The average is 98.82126%.

The margin of error is +0.009876%, which is pretty good, no? :v

I was going to use a script to brute-force the entire thing initially, but I changed my mind because my computer is terrible.

edit
I optimized it and did 5,000,000 instances only to achieve a value of 98.8159%, which reduces the margin of error to +0.00435%.
That's a nice solution.

A small warning though:
    /*
    *   Cool way to seed the 
    *   pseudo-random number generator.
    *   It's cool because it doesn't 
    *   require you to include a time 
    *   library.
    */
    int k = 0;
    srand((int)&k);
This didn't compile on my g++. Error message:
Main.cpp: In function ‘int main()’:
Main.cpp:26: error: cast from ‘int*’ to ‘int’ loses precision
It's probably not very portable.