SDahm
SDahm

Reputation: 436

In R: shorten list from behind till a certain value

Suppose i have many and larger lists than in this example.

ID <- c(2,2,2,2,5,5,5,5,9,9,9,9)
Rounds <- c(0,1,2,3,0,1,2,3,0,1,2,3)
solution <- c(as.character('[[-1,1,1,-1],[1,-1,1,-1]]'),as.character('[[-1,1,-1,-1],[1,-1,1,-1]]'),as.character('[[-1,1,1,-1],[1,-1,-1,-1]]') )
solution_resp <- c(as.character('[[-1,1,1,1],[1,1,-1,1]]'),as.character('[[1,-1,1,-1],[1,-1,1,1]]'),as.character('[[-1,1,1,1],[1,-1,1,-1]]'))

dt <- data.frame(ID,Rounds,solution,solution_resp)

dt$solution <- gsub('],[',',', dt$solution, fixed=TRUE)
dt$solution <- gsub('[[',''  , dt$solution, fixed=TRUE)
dt$solution <- gsub(']]',''  , dt$solution, fixed=TRUE)

dt$solution <- as.list(strsplit(dt$solution, ","))

dt$solution_resp <- gsub('],[',',', dt$solution_resp, fixed=TRUE)
dt$solution_resp <- gsub('[[',''  , dt$solution_resp, fixed=TRUE)
dt$solution_resp <- gsub(']]',''  , dt$solution_resp, fixed=TRUE)

dt$solution_resp <- as.list(strsplit(dt$solution_resp, ","))

Looks like this:

   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1  -1, 1, 1, 1, 1, 1, -1, 1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1 1, -1, 1, -1, 1, -1, 1, 1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

In the lists dt$solution_resp I would like to drop all 1 from behind until the first -1 appears (counting from right to left).

In the example this would mean shortening the lists by taking off the last element in first row (and every third row). In the second row (and every third) the list shall be shorted by two elements. The third row remains as it is because there is a -1 at the last position already.

Then, I want the dt$solution lists to have the same length.

Solution:

   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1         1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1         1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1,        1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1,    -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1,        1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

Upvotes: 2

Views: 167

Answers (3)

thelatemail
thelatemail

Reputation: 93813

Find the Position of the last -1 starting at the right= and only keep up until this point:

lapply(dt$solution_resp, \(x) head(x, Position(identity, x=="-1", right=TRUE)) )

In the circumstance where there could be rows without a -1 at all:

lapply(dt$solution_resp, \(x) 
  head(x, pmin(Position(identity, x=="-1", right=TRUE), Inf, na.rm=TRUE)))

#[[1]]
#[1] "-1" "1"  "1"  "1"  "1"  "1"  "-1"
#
#[[2]]
#[1] "1"  "-1" "1"  "-1" "1"  "-1"
#
#[[3]]
#[1] "-1" "1"  "1"  "1"  "1"  "-1" "1"  "-1"
#...

Upvotes: 1

Onyambu
Onyambu

Reputation: 79228

Using the original dt <- data.frame(ID,Rounds,solution,solution_resp) Do the following:

dt %>%
      mutate(across(contains('solution'),
            ~str_split(str_remove_all(.x,'\\[|\\]|(, ?1)\\1*\\]+$'), ',')))
  ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

in Base R:

dt[3:4] <- gsub('\\[|\\]|(, ?1)\\1*\\]+$', '', as.matrix(dt[3:4]))
dt[3:4]<-lapply(dt[3:4], strsplit, ',')
dt
   ID Rounds                    solution             solution_resp
1   2      0  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
2   2      1 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
3   2      2 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
4   2      3  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
5   5      0 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
6   5      1 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
7   5      2  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
8   5      3 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
9   9      0 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1
10  9      1  -1, 1, 1, -1, 1, -1, 1, -1     -1, 1, 1, 1, 1, 1, -1
11  9      2 -1, 1, -1, -1, 1, -1, 1, -1       1, -1, 1, -1, 1, -1
12  9      3 -1, 1, 1, -1, 1, -1, -1, -1 -1, 1, 1, 1, 1, -1, 1, -1

Upvotes: 1

wibeasley
wibeasley

Reputation: 5287

Just a tad longer than @thelatemail's one-liner.

dt |> 
  dplyr::select(
    ID,
    Rounds,
    s            = solution_resp,
  ) |> 
  dplyr::mutate(
    s = gsub("\\[|\\]", "", s),               # Remove the brackets
  ) |> 
  tibble::rowid_to_column("row_id") |> 
  tidyr::separate_rows(s, sep = ",") |> 
  dplyr::mutate(
    s = dplyr::recode(s, "-1" = T, "1" = F)   # Make the math easier later
  ) |> 
  dplyr::group_by(row_id, ID, Rounds) |> 
  dplyr::mutate(
    index       = seq_len(dplyr::n()),        # Remember the ordering
  ) |> 
  dplyr::arrange(dplyr::desc(index)) |>       # Reverse
  dplyr::mutate(
    cumulative = dplyr::cumany(s),            # Find the first (last) 1/F.
    s = dplyr::recode(as.character(s), "TRUE" = "-1", "FALSE" = "1")
  ) |> 
  dplyr::filter(cumulative) |>                # Drop everything afterwards
  dplyr::ungroup() |> 
  dplyr::arrange(ID, Rounds, row_id, index) |>  # Reverse again
  dplyr::group_by(row_id, ID, Rounds) |> 
  dplyr::summarize(
    solution_resp = paste(s, collapse = ", "),  # Smush back in a row
    element_count = dplyr::n(),                 # Quick count for inspection
  ) |> 
  dplyr::ungroup()

Output:

# A tibble: 12 x 5
   row_id    ID Rounds solution_resp             element_count
    <int> <dbl>  <dbl> <chr>                             <int>
 1      1     2      0 -1, 1, 1, 1, 1, 1, -1                 7
 2      2     2      1 1, -1, 1, -1, 1, -1                   6
 3      3     2      2 -1, 1, 1, 1, 1, -1, 1, -1             8
 4      4     2      3 -1, 1, 1, 1, 1, 1, -1                 7
 5      5     5      0 1, -1, 1, -1, 1, -1                   6
 6      6     5      1 -1, 1, 1, 1, 1, -1, 1, -1             8
 7      7     5      2 -1, 1, 1, 1, 1, 1, -1                 7
 8      8     5      3 1, -1, 1, -1, 1, -1                   6
 9      9     9      0 -1, 1, 1, 1, 1, -1, 1, -1             8
10     10     9      1 -1, 1, 1, 1, 1, 1, -1                 7
11     11     9      2 1, -1, 1, -1, 1, -1                   6
12     12     9      3 -1, 1, 1, 1, 1, -1, 1, -1             8

(I'm using the initial dt <- data.frame(ID,Rounds,solution,solution_resp) definition of dt).

Upvotes: 1

Related Questions