hapmap2df <- function(x, gt.col = 2, first.col = 12, last.col = ncol(x)){ ambi <- c("S","M","Y","R","K","W") names(ambi) <- c("A/T","C/G","A/C","G/T","A/G","C/T") genotypes <- levels(x[[gt.col]]) major_alleles <- unlist(strsplit(genotypes, "/")) major_alleles <- major_alleles[1:(length(major_alleles)) %% 2 == 1] print(major_alleles) levels(x[[gt.col]]) <- major_alleles gtlevs <- unique(levels(x[[gt.col]])) print(gtlevs) x[c(gt.col, first.col:last.col)] <- lapply(x[c(gt.col, first.col:last.col)], as.character) x[first.col:last.col][apply(x[first.col:last.col], 2, `==`, x[[gt.col]])] <- 2 x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, gtlevs)] <- 0 x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, "N")] <- NA x[first.col:last.col][apply(x[first.col:last.col], 2, `%in%`, ambi)] <- 1 out_matrix <- t(x[first.col:last.col]) out_matrix <- apply(out_matrix, 2, as.numeric) colnames(out_matrix) <- x[[1]] rownames(out_matrix) <- names(x)[first.col:last.col] return(out_matrix) }