r - Predominant calculation for Character fields -
i'm trying loop through column names type = character , return 1 data frame contains predominant values of each character column, grouped id field.
is there way replicate following code in kind of loop?:
df_characters <- df_characters[,sapply(dfr,is.character)] ##predominance column1## predom <- select(df_characters, group_id, column_1) predom <- group_by(predom,group_id, column_1) predom <- summarise(predom, countpredom = n() ) predom <- arrange(predom,group_id, desc(countpredom) ) predom <- data.table(predom, key="group_id") predominant_column_1 <- predom[,head(.sd,1),by=group_id] ##predominant column_2## predom <- select(df_characters, group_id, column_2) predom <- group_by(predom,group_id, column_2) predom <- summarise(predom, countpredom = n() ) predom <- arrange(predom,group_id, desc(countpredom) ) predom <- data.table(predom, key="group_id") predominant_column_2 <- predom[,head(.sd,1),by=group_id] ##merge final table## merged <- merge(predominant_column_1 ,predominant_column_2 ,by="group_id")
also clarify question added dummy table: df_character_table
result shoul result table
so group 1 petre predominant name in column 1 , car predominant mode of travel. column 1 , column 2 predominance should calculated respectively.
thank you
this not best solution works.
##########predominant calculations #character fields df_characters <- as.data.frame(dfr) df_characters <- df_characters[,sapply(dfr,is.character)] # field names without group id characterstomerge <- c(names(df_characters)) #add groupby id character fields character_field_list <- c("groupby_id", names(df_characters)) df_characters <- subset(dfr,select = character_field_list) #column names loop through df_fieldstomerge <- subset(dfr,select = characterstomerge) # predominant table fin_table <- df_characters %>% group_by(groupby_id) %>% tally(sort = true) #count observations # loop , merge tables predominant table for(i in names(df_fieldstomerge)){ temp_table <- df_characters %>% group_by_("groupby_id", ) %>% tally(sort = true) temp_table <- temp_table[,head(.sd,1),by=groupby_id] #remove ties temp_table <- subset(temp_table,select = c("groupby_id", i)) #remove counts fin_table <- merge(fin_table, temp_table, by="groupby_id") }
Comments
Post a Comment