-->

Create n by n matrix with unique values from 1:n

2020-08-22 07:49发布

问题:

I want generate a random n by n matrix in R with discrete values ranging from 1 to n. The tricky part is that I want each value to be unique both in the row and on the column.

For example, if n=3 the matrix could look like:

1 2 3 
2 3 1 
3 1 2 

or it could look like this:

2 3 1 
1 2 3 
3 1 2 

anyone has any idea of how to generate this kind of matrix?

回答1:

What you want is called a Latin square. Here's one function (from the Cookbook for R; see also here and a bunch of other search results online) allowing to generate them:

latinsquare <- function(len, reps=1, seed=NA, returnstrings=FALSE) {

    # Save the old random seed and use the new one, if present
    if (!is.na(seed)) {
        if (exists(".Random.seed"))  { saved.seed <- .Random.seed }
        else                         { saved.seed <- NA }
        set.seed(seed)
    }

    # This matrix will contain all the individual squares
    allsq <- matrix(nrow=reps*len, ncol=len)

    # Store a string id of each square if requested
    if (returnstrings) {  squareid <- vector(mode = "character", length = reps) }

    # Get a random element from a vector (the built-in sample function annoyingly
    #   has different behavior if there's only one element in x)
    sample1 <- function(x) {
        if (length(x)==1) { return(x) }
        else              { return(sample(x,1)) }
    }

    # Generate each of n individual squares
    for (n in 1:reps) {

        # Generate an empty square
        sq <- matrix(nrow=len, ncol=len) 

        # If we fill the square sequentially from top left, some latin squares
        # are more probable than others.  So we have to do it random order,
        # all over the square.
        # The rough procedure is:
        # - randomly select a cell that is currently NA (call it the target cell)
        # - find all the NA cells sharing the same row or column as the target
        # - fill the target cell
        # - fill the other cells sharing the row/col
        # - If it ever is impossible to fill a cell because all the numbers
        #    are already used, then quit and start over with a new square.
        # In short, it picks a random empty cell, fills it, then fills in the 
        # other empty cells in the "cross" in random order. If we went totally randomly
        # (without the cross), the failure rate is much higher.
        while (any(is.na(sq))) {

            # Pick a random cell which is currently NA
            k <- sample1(which(is.na(sq)))

            i <- (k-1) %% len +1       # Get the row num
            j <- floor((k-1) / len) +1 # Get the col num

            # Find the other NA cells in the "cross" centered at i,j
            sqrow <- sq[i,]
            sqcol <- sq[,j]

            # A matrix of coordinates of all the NA cells in the cross
            openCell <-rbind( cbind(which(is.na(sqcol)), j),
                              cbind(i, which(is.na(sqrow))))
            # Randomize fill order
            openCell <- openCell[sample(nrow(openCell)),]

            # Put center cell at top of list, so that it gets filled first
            openCell <- rbind(c(i,j), openCell)
            # There will now be three entries for the center cell, so remove duplicated entries
            # Need to make sure it's a matrix -- otherwise, if there's just 
            # one row, it turns into a vector, which causes problems
            openCell <- matrix(openCell[!duplicated(openCell),], ncol=2)

            # Fill in the center of the cross, then the other open spaces in the cross
            for (c in 1:nrow(openCell)) {
                # The current cell to fill
                ci <- openCell[c,1]
                cj <- openCell[c,2]
                # Get the numbers that are unused in the "cross" centered on i,j
                freeNum <- which(!(1:len %in% c(sq[ci,], sq[,cj])))

                # Fill in this location on the square
                if (length(freeNum)>0) { sq[ci,cj] <- sample1(freeNum) }
                else  {
                    # Failed attempt - no available numbers
                    # Re-generate empty square
                    sq <- matrix(nrow=len, ncol=len)

                    # Break out of loop
                    break;
                }
            }
        }

        # Store the individual square into the matrix containing all squares
        allsqrows <- ((n-1)*len) + 1:len
        allsq[allsqrows,] <- sq

        # Store a string representation of the square if requested. Each unique
        # square has a unique string.
        if (returnstrings) { squareid[n] <- paste(sq, collapse="") }

    }

    # Restore the old random seed, if present
    if (!is.na(seed) && !is.na(saved.seed)) { .Random.seed <- saved.seed }

    if (returnstrings) { return(squareid) }
    else               { return(allsq) }
}


回答2:

mats is a list of such matrices. It uses r2dtable to generate N random n x n matrices whose elements are chosen from 0, 1, ..., n-1 and whose margins are each given by margin. Then it filters out those for which all columns columns have one each of 0:(n-1) and adds one to each matrix to give result. The number of matrices returned can vary and you have to generate a huge number of matrices N to get just a few as n gets larger. When I tried n <- 3 below mats was a list of 24 matrices out of 100 but with n <- 4 it only found 1 out of 100.

set.seed(123)
N <- 100 # no of tries
n <- 3 # rows of matrix (= # cols)

check <- function(x) all(apply(x, 2, sort) == seq_len(nrow(x))-1)
margin <- sum(seq_len(n))-n
margins <- rep(margin, n)
L <- r2dtable(N, r = margins, c = margins)
mats <- lapply(Filter(check, L), "+", 1)


回答3:

Here is an attempt:

x <- c(1,2,3)
out <- NULL
for(i in 1:3){
  y <- c(x[1 + (i+0) %% 3], x[1 + (i+1) %% 3], x[1 + (i+2) %% 3])
  out <- rbind(out,y)
}

This gives:

> out
  [,1] [,2] [,3]
y    2    3    1
y    3    1    2
y    1    2    3

For the general case:

n <- 4
x <- 1:n
out <- NULL
for(i in 1:n){
  y <- x[1 + ((i+0:(n-1))%%n)]
  out <- rbind(out,y)
}

If I'm not wrong this is the expected result:

> out
  [,1] [,2] [,3] [,4]
y    2    3    4    1
y    3    4    1    2
y    4    1    2    3
y    1    2    3    4

Shorter:

n < 4 
x <- 1:n
vapply(x, function(i) x[1 + ((i+0:(n-1))%%n)], numeric(n))


回答4:

Here is one version that generates all possible rows for such matrix and then takes them one by one, restricting the selection to the valid choices each time:

n <- 9
allrows <- combinat::permn(n)

takerows <- function(taken, all) {
  available <- rep(TRUE, length(all))
  for(i in 1:nrow(taken)) {
    available <- sapply(all, function(x) all((x-taken[i,])!=0)) & available
  }
  matrix(all[[which(available)[sample(sum(available), 1)]]], nrow=1)
}

magicMat <- takerows(matrix(rep(0, n), ncol=n), allrows)
for(i in 1:(n-1)) {
  magicMat <- rbind(magicMat, takerows(magicMat, allrows))
}


> magicMat
      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9]
 [1,]    5    3    1    4    2    8    6    7    9
 [2,]    9    8    6    2    1    3    7    4    5
 [3,]    4    5    7    8    9    2    3    6    1
 [4,]    3    9    2    1    6    7    5    8    4
 [5,]    1    6    5    3    8    4    2    9    7
 [6,]    7    2    4    9    3    5    8    1    6
 [7,]    6    4    8    5    7    1    9    3    2
 [8,]    8    1    9    7    5    6    4    2    3
 [9,]    2    7    3    6    4    9    1    5    8


标签: r matrix