##################### # Normal Processing # ##################### # install the gender package if you need to install.packages('gender') # NOTE - if asked to install GenderData click 1 for yes. # load package library(gender) ; # Import CSV with list of first names - set path firstnames <- read.csv("D:\\Dropbox\\Data Mining\\R\\Top 1000 names.csv", stringsAsFactors=FALSE) # if you have a short list or don't have a CSV to try then you can build your own list - simply uncomment # firstnames <- c("Elizabeth", "Mary", "Jeff", "John", "Morgan", "Helen", "Tim", "Diane", "Patricia") values <- as.vector(firstnames[,1]) # Create gender search function workerFunc <- function(n){ return cbind(n, gender(n, method = "ssa", years = c(1900, 1990))$gender) } # Start process and track processing time Sys.time() res <- lapply(values, workerFunc) Sys.time() # Put final results together in data frame indx <- sapply(res, length) results <- as.data.frame(do.call(rbind,lapply(res, `length<-`, max(indx)))) colnames(results) <- c("name", "gender") # Optional write results to CSV - set path write.csv(results,"D:/Dropbox/Data Mining/R/Top 1000 names with gender.csv") ####################### # Parallel Processing # ####################### # install the gender package install.packages('gender') # NOTE - if asked to install GenderData click 1 for yes. install.packages('parallel') ; install.packages('doParallel') ; # load packages library(gender) ; library(parallel) ; library(doParallel) ; # Detect Cores and Register cl<-makeCluster(detectCores()) setDefaultCluster(cl) registerDoParallel(cl, cores=detectCores()) clusterEvalQ(cl, "gender") clusterExport(cl,"gender") # Import CSV with list of first names - set path firstnames <- read.csv("D:\\Dropbox\\Data Mining\\R\\Top 1000 names.csv", stringsAsFactors=FALSE) # if you have a short list or don't have a CSV to try then you can build your own list - simply uncomment # firstnames <- c("Elizabeth", "Mary", "Jeff", "John", "Morgan", "Helen", "Tim", "Diane", "Patricia") values <- as.vector(firstnames[,1]) # Create gender search function workerFunc <- function(n){ return cbind(n, gender(n, method = "ssa", years = c(1900, 1990))$gender) } # Start process and track processing time Sys.time() res <- parLapply(cl, values, workerFunc) Sys.time() # Stop the cluster and create the result data frame stopCluster(cl) # Put final results together in data frame indx <- sapply(res, length) results <- as.data.frame(do.call(rbind,lapply(res, `length<-`, max(indx)))) colnames(results) <- c("name", "gender") # Optional write results to CSV - set path write.csv(results,"D:/Dropbox/Data Mining/R/Top 1000 names with gender.csv")