Loop over strings in r -
i'd know wrong code rather solution. wish loop on strings data follows:
id source transaction 1 > b 6 > 0 2 j > k 5 3 b > c 4 > 0 i have list , wish go on list , find rows contains element , compute average.
mylist <- c ("a", "b") so desired output 1 of element in list
source avg 6 b 2 i not know loop on list , send them csv file. tried this
mylist <- c( "a", "b" ) for(i in mylist) { keepdata <- df [grepl(i, df$source), ] keepdata <- csplit(keepdata, "transaction", ">", "long") avg<- mean(keepdata$transactions) result <- list(i,avg ) write.table(result ,file="c:/users.csv", append=true,sep=",",col.names=false,row.names=false) } but gives me "na" result following warning
warning messages: 1: in mean.default(keepdata$transactions) :
argument not numeric or logical: returning na 2: in mean.default(keepdata$transactions) : argument not numeric or logical: returning na
we can use csplit split 'source' , convert dataset 'long' format, specify 'i', grouped 'source', mean of 'transaction' (using data.table methods)
library(splitstackshape) csplit(df1, "source", " > ", "long")[source %in% mylist, .(avg = mean(transaction)), source] # source avg #1: 6 #2: b 5 or option separate_rows tidyr convert 'long' format, use dplyr methods summarise after grouping 'source'
library(tidyr) library(dplyr) separate_rows(df1, source) %>% filter(source %in% mylist) %>% group_by(source) %>% summarise(avg = mean(transaction)) update
for new dataset ('df2'), need split both columns 'long' format, , mean of 'transaction' grouped 'source'
csplit(df2, 2:3, " > ", "long")[source %in% my_list, .(avg = mean(transaction)), source] # source avg #1: 6 #2: b 2 the for loop can modified to
for(i in mylist) { keepdata <- csplit(df2, 2:3, ">", "long") keepdata <- keepdata[grepl(i, source)] avg<- mean(keepdata$transaction) result <- list(i,avg ) print(result) write.table(result ,file="c:/users.csv", append=true,sep=",",col.names=false,row.names=false) } #[[1]] #[1] "a" #[[2]] #[1] 6 #[[1]] #[1] "b" #[[2]] #[1] 2 data
df1 <- structure(list(id = 1:3, source = c("a > b", "j > k", "b > c" ), transaction = c(6l, 5l, 4l)), .names = c("id", "source", "transaction" ), class = "data.frame", row.names = c(na, -3l)) df2 <- structure(list(id = 1:3, source = c("a > b", "j > k", "b > c" ), transaction = c("6 > 0", "5", "4 > 0")), .names = c("id", "source", "transaction"), class = "data.frame", row.names = c(na, -3l))
Comments
Post a Comment