Reputation: 2067
I have a list of lists which I am trying to "merge" together.
The dimentions are:
> dim(lst$`63908`$information)
[1] 1 19
> dim(lst$`63908`$filings)
[1] 27 11
> dim(lst$`793952`$information)
[1] 1 19
> dim(lst$`793952`$filings)
[1] 27 11
> dim(lst$`894405`$information)
[1] 1 19
> dim(lst$`894405`$filings)
[1] 27 11
I am trying to join information
and filings
for each company (or number) in the list. I want to map or join the information
list to the filings
list so I will have 1 list for each company and the information
list will be repeated 27
times which is the length of the filings
list. How can I proceed with this?
New Data:
lst2 <- list(`43410` = list(information = structure(list(name = "GREAT NORTHERN IRON ORE PROPERTIES",
cik = "0000043410", fiscal_year_end = "1231", company_href = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0000043410&owner=exclude&count=100",
sic = "6795", sic_description = "MINERAL ROYALTY TRADERS",
state_location = "MN", state_incorporation = "MN", mailing_city = "ST PAUL",
mailing_state = "MN", mailing_zip = "55101-1361", mailing_street = "W 1290 FIRST NATIONAL BANK BLDG",
mailing_street2 = "332 MINNESOTA STREET", business_city = "SAINT PAUL",
business_state = "MN", business_zip = "55101-1361", business_street = "W 1290 FIRST NATIONAL BANK BLDG",
business_street2 = "332 MINNESOTA ST", business_phone = "6122242385"), row.names = c(NA,
-1L), class = "data.frame"), filings = structure(list(accession_number = c("0000897101-15-000249",
"0000897101-14-000198", "0000897101-13-000217", "0000897101-12-000229",
"0000897101-11-000241", "0000897101-10-000259", "0000897101-09-000357",
"0000897101-08-000407", "0000897101-07-000419", "0000897101-06-000431",
"0000897101-05-000553", "0000897101-04-000395", "0000897101-03-000189",
"0000897101-02-000168", "0000897101-01-000229", "0000897101-00-000219",
"0000897101-99-000213", "0000897101-98-000278", "0000897101-97-000272",
"0000897101-96-000096"), act = c("34", "34", "34", "34", "34",
"34", "34", "34", "34", "34", "34", NA, NA, NA, NA, NA, NA, NA,
NA, NA), file_number = c("001-00701", "001-00701", "001-00701",
"001-00701", "001-00701", "001-00701", "001-00701", "001-00701",
"001-00701", "001-00701", "001-00701", "001-00701", "001-00701",
"001-00701", "001-00701", "001-00701", "001-00701", "001-00701",
"001-00701", "001-00701"), filing_date = structure(c(1424905200,
1392850800, 1361487600, 1329433200, 1297983600, 1266447600, 1235602800,
1204066800, 1172185200, 1140735600, 1109286000, 1077750000, 1047596400,
1016146800, 984092400, 952642800, 921193200, 889743600, 858294000,
826239600), class = c("POSIXct", "POSIXt"), tzone = ""), accepted_date = structure(c(1424905200,
1392850800, 1361487600, 1329433200, 1297983600, 1266447600, 1235602800,
1204066800, 1172185200, 1140735600, 1109286000, 1077750000, 1047596400,
1016146800, 984092400, 952642800, 921193200, 889743600, 858294000,
826239600), class = c("POSIXct", "POSIXt"), tzone = ""), href = c("https://www.sec.gov/Archives/edgar/data/43410/000089710115000249/0000897101-15-000249-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710114000198/0000897101-14-000198-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710113000217/0000897101-13-000217-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710112000229/0000897101-12-000229-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710111000241/0000897101-11-000241-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710110000259/0000897101-10-000259-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710109000357/0000897101-09-000357-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710108000407/0000897101-08-000407-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710107000419/0000897101-07-000419-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710106000431/0000897101-06-000431-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710105000553/0000897101-05-000553-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710104000395/0000897101-04-000395-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710103000189/0000897101-03-000189-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710102000168/0000897101-02-000168-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/000089710101000229/0000897101-01-000229-index.htm",
"https://www.sec.gov/Archives/edgar/data/43410/0000897101-00-000219-index.html",
"https://www.sec.gov/Archives/edgar/data/43410/0000897101-99-000213-index.html",
"https://www.sec.gov/Archives/edgar/data/43410/0000897101-98-000278-index.html",
"https://www.sec.gov/Archives/edgar/data/43410/0000897101-97-000272-index.html",
"https://www.sec.gov/Archives/edgar/data/43410/0000897101-96-000096-index.html"
), type = c("10-K", "10-K", "10-K", "10-K", "10-K", "10-K", "10-K",
"10-K", "10-K", "10-K", "10-K", "10-K", "10-K", "10-K405", "10-K405",
"10-K405", "10-K405", "10-K405", "10-K405", "10-K405"), film_number = c("15651558",
"14628861", "13633190", "12621778", "11623572", "10615446", "09636574",
"08645305", "07644168", "06641277", "05639391", "04629089", "03603149",
"02576043", "1564399", "565431", "99563708", "98564913", "97556531",
"96532494"), form_name = c("Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]", "Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]", "Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]", "Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]", "Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]"), description = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
size = c("5 MB", "5 MB", "3 MB", "1 MB", "1 MB", "1 MB",
"1 MB", "1 MB", "1 MB", "514 KB", "493 KB", "525 KB", "168 KB",
"147 KB", "142 KB", "135 KB", "137 KB", "132 KB", "128 KB",
"132 KB")), row.names = c(NA, -20L), class = "data.frame")),
`854099` = list(information = structure(list(name = "SURGICAL LASER TECHNOLOGIES INC /DE/",
cik = "0000854099", fiscal_year_end = "0103", company_href = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0000854099&owner=exclude&count=100",
sic = "3845", sic_description = "ELECTROMEDICAL & ELECTROTHERAPEUTIC APPARATUS",
state_location = "PA", state_incorporation = "DE", mailing_city = "MONTGOMERYVILLE",
mailing_state = "PA", mailing_zip = "18936", mailing_street = "147 KEYSTONE DRIVE",
mailing_street2 = NA_character_, business_city = "MONTGOMERYVILLE",
business_state = "PA", business_zip = "18936", business_street = "147 KEYSTONE DRIVE",
business_street2 = NA_character_, business_phone = "6106500700"), row.names = c(NA,
-1L), class = "data.frame"), filings = structure(list(accession_number = c("0000950154-02-000122",
"0000950154-01-500166", "0000950115-00-000365", "0000950115-99-000493",
"0000950115-98-000504", "0000950115-97-000529", "0000950115-97-000492",
"0000950115-96-001251", "0000950115-96-000320"), act = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), file_number = c("000-17919", "000-17919", "000-17919",
"000-17919", "000-17919", "000-17919", "000-17919", "000-17919",
"000-17919"), filing_date = structure(c(1017356400, 985903200,
953766000, 922917600, 890780400, 860364000, 859759200, 841183200,
828309600), class = c("POSIXct", "POSIXt"), tzone = ""),
accepted_date = structure(c(1017356400, 985903200, 953766000,
922917600, 890780400, 860364000, 859759200, 841183200,
828309600), class = c("POSIXct", "POSIXt"), tzone = ""),
href = c("https://www.sec.gov/Archives/edgar/data/854099/000095015402000122/0000950154-02-000122-index.htm",
"https://www.sec.gov/Archives/edgar/data/854099/000095015401500166/0000950154-01-500166-index.htm",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-00-000365-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-99-000493-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-98-000504-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-97-000529-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-97-000492-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-96-001251-index.html",
"https://www.sec.gov/Archives/edgar/data/854099/0000950115-96-000320-index.html"
), type = c("10-K", "10-K", "10-K405", "10-K", "10-K405",
"10-K405/A", "10-K405", "10-K/A", "10-K"), film_number = c("02593792",
"1587034", "576973", "99586150", "98573043", "97576026",
"97571667", "96622343", "96542736"), form_name = c("Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]"
), description = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_), size = c("194 KB", "209 KB",
"224 KB", "235 KB", "261 KB", "185 KB", "187 KB", "234 KB",
"206 KB")), row.names = c(NA, -9L), class = "data.frame")),
`1024125` = list(information = structure(list(name = "CRAGAR INDUSTRIES INC /DE",
cik = "0001024125", fiscal_year_end = "1231", company_href = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0001024125&owner=exclude&count=100",
sic = "3714", sic_description = "MOTOR VEHICLE PARTS & ACCESSORIES",
state_location = "AZ", state_incorporation = "DE", mailing_city = "PHOENIX",
mailing_state = "AZ", mailing_zip = "85031", mailing_street = "4636 N. 43RD AVE",
mailing_street2 = NA_character_, business_city = "PHOENIX",
business_state = "AZ", business_zip = "85031", business_street = "4336 N. 43RD AVE",
business_street2 = NA_character_, business_phone = "6022471300"), row.names = c(NA,
-1L), class = "data.frame"), filings = structure(list(), .Names = character(0), class = "data.frame", row.names = integer(0))),
`712515` = list(information = structure(list(name = "ELECTRONIC ARTS INC.",
cik = "0000712515", fiscal_year_end = "0331", company_href = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0000712515&owner=exclude&count=100",
sic = "7372", sic_description = "SERVICES-PREPACKAGED SOFTWARE",
state_location = "CA", state_incorporation = "DE", mailing_city = "REDWOOD CITY",
mailing_state = "CA", mailing_zip = "94065", mailing_street = "209 REDWOOD SHORES PARKWAY",
mailing_street2 = NA_character_, business_city = "REDWOOD CITY",
business_state = "CA", business_zip = "94065", business_street = "209 REDWOOD SHORES PARKWAY",
business_street2 = NA_character_, business_phone = "650-628-1500"), row.names = c(NA,
-1L), class = "data.frame"), filings = structure(list(accession_number = c("0000712515-18-000024",
"0000712515-17-000035", "0000712515-16-000111", "0000712515-15-000033",
"0000712515-14-000049", "0000712515-14-000024", "0000712515-13-000022",
"0001193125-12-249324", "0001193125-11-149262", "0000950130-10-001579",
"0001193125-09-116895", "0000891618-08-000290", "0000950134-07-012528",
"0000950134-06-011401", "0000891618-05-000406", "0000891618-04-001046",
"0000891618-03-002939", "0001012870-02-002877", "0000950005-01-500255",
"0000950005-00-000876", "0000950005-00-000777", "0000950005-99-000599",
"0000950005-98-000571", "0001012870-97-001195", "0000912057-96-013563",
"0000912057-95-004984"), act = c("34", "34", "34", "34",
"34", "34", "34", "34", "34", "34", "34", "34", "34", "34",
"34", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), file_number = c("000-17948",
"000-17948", "000-17948", "000-17948", "000-17948", "000-17948",
"000-17948", "000-17948", "000-17948", "000-17948", "000-17948",
"000-17948", "000-17948", "000-17948", "000-17948", "000-17948",
"000-17948", "000-17948", "000-17948", "000-17948", "000-17948",
"000-17948", "000-17948", "000-17948", "000-17948", "000-17948"
), filing_date = structure(c(1527026400, 1495576800, 1464300000,
1432159200, 1412287200, 1400623200, 1369173600, 1337896800,
1306188000, 1274997600, 1242943200, 1211493600, 1180476000,
1150063200, 1118095200, 1086300000, 1055196000, 1025215200,
993765600, 965944800, 962229600, 930607200, 898812000, 867016800,
836172000, 804376800), class = c("POSIXct", "POSIXt"), tzone = ""),
accepted_date = structure(c(1527026400, 1495576800, 1464213600,
1432159200, 1412287200, 1400623200, 1369173600, 1337896800,
1306188000, 1274997600, 1242856800, 1211493600, 1180389600,
1149804000, 1118095200, 1086300000, 1055196000, 1025215200,
993765600, 965944800, 962229600, 930607200, 898812000,
867016800, 836172000, 804376800), class = c("POSIXct",
"POSIXt"), tzone = ""), href = c("https://www.sec.gov/Archives/edgar/data/712515/000071251518000024/0000712515-18-000024-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251517000035/0000712515-17-000035-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251516000111/0000712515-16-000111-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251515000033/0000712515-15-000033-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251514000049/0000712515-14-000049-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251514000024/0000712515-14-000024-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000071251513000022/0000712515-13-000022-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000119312512249324/0001193125-12-249324-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000119312511149262/0001193125-11-149262-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095013010001579/0000950130-10-001579-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000119312509116895/0001193125-09-116895-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000089161808000290/0000891618-08-000290-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095013407012528/0000950134-07-012528-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095013406011401/0000950134-06-011401-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000089161805000406/0000891618-05-000406-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000089161804001046/0000891618-04-001046-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000089161803002939/0000891618-03-002939-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000101287002002877/0001012870-02-002877-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095000501500255/0000950005-01-500255-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095000500000876/0000950005-00-000876-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/000095000500000777/0000950005-00-000777-index.htm",
"https://www.sec.gov/Archives/edgar/data/712515/0000950005-99-000599-index.html",
"https://www.sec.gov/Archives/edgar/data/712515/0000950005-98-000571-index.html",
"https://www.sec.gov/Archives/edgar/data/712515/0001012870-97-001195-index.html",
"https://www.sec.gov/Archives/edgar/data/712515/0000912057-96-013563-index.html",
"https://www.sec.gov/Archives/edgar/data/712515/0000912057-95-004984-index.html"
), type = c("10-K", "10-K", "10-K", "10-K", "10-K/A",
"10-K", "10-K", "10-K", "10-K", "10-K", "10-K", "10-K",
"10-K", "10-K", "10-K", "10-K", "10-K", "10-K", "10-K",
"10-K/A", "10-K", "10-K", "10-K", "10-K", "10-K", "10-K405"
), film_number = c("18855472", "17867170", "161679738",
"15883321", "141140195", "14861211", "13865105", "12871782",
"11868579", "10867459", "09846699", "08859043", "07885284",
"06898280", "05882947", "04850503", "03739506", "02691823",
"1671648", "694392", "663928", "99655069", "98655515",
"97627835", "96589370", "95550463"), form_name = c("Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Section 13 and 15(d), not S-K Item 405]",
"Annual report [Sections 13 and 15(d), S-K Item 405]"
), description = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), size = c("15 MB",
"14 MB", "15 MB", "21 MB", "934 KB", "26 MB", "21 MB",
"14 MB", "17 MB", "5 MB", "2 MB", "1 MB", "1 MB", "1 MB",
"1 MB", "1 MB", "1 MB", "900 KB", "1 MB", "7 KB", "412 KB",
"716 KB", "226 KB", "568 KB", "197 KB", "431 KB")), row.names = c(NA,
-26L), class = "data.frame")))
Upvotes: 2
Views: 61
Reputation: 107687
Consider a base R solution to extract needed sublists and cbind
items together with Map
. Then a final do.call(rbind, ...)
outside loop for single final data frame output.
Because there are empty data frames in filings, an extended function handler is needed to add empty columns for successfully rbind
append at end. Below even creates an ID column using names of one of the sublist:
# EXTRACT NEEDED SUB LISTS
info_list <- lapply(lst2, `[[`, "information")
filings_list <- lapply(lst2, `[[`, "filings")
proc_merge <- function(i, f, n) {
if(nrow(f) == 0) {
# FILL EMPTY COLUMNS WITH NA
i[colnames(filings_list[[1]])] <- NA
df <- cbind(ID=n, i)
} else {
df <- cbind(ID=n, i, f)
}
return(df)
}
# BUILD DF LIST
df_list <- Map(proc_merge, info_list, filings_list, names(info_list))
# ROW BIND ALL ELEMENTS AND RESET ROWNAMES
final_df <- do.call(rbind, df_list)
row.names(final_df) <- NULL
Upvotes: 1
Reputation: 887501
If there are cases of NULL
elements, one approach would be to pluck
the 'information' and 'filings' separately, convert to single data.frame with an id
column and do a full_join
(assuming that the 'information' element have either 1 or 0 rows.
library(tidyverse)
map_df(lst2, pluck, 'information', .id = 'id') %>%
full_join(map_df(lst2, pluck, 'filings', .id = 'id'), by = 'id')
Upvotes: 1
Reputation: 3175
Here's an answer to handle lists of different lengths:
library(tidyr)
library(purrr)
library(dplyr)
lst2$`1024125` <- NULL
lst2 %>%
map(function(x) {cbind.data.frame(x) %>% fill(everything())}) -> lst3
Take notice that I am discarding element 1024125
from list lst2
. That list element contains an empty dataframe with not columns whatsoever. I think you should tidy your data first and make sure that all those dataframes have at least the same columns, i.e. all information
dataframes have the same columns, and all filings
dataframes have the same columns.
Upvotes: 1
Reputation: 3175
library(purrr)
foo <- map(lst, function(x) { map_at(x, "information", function(y){map(y, rep, 27)})})
bar <- foo %>% map(flatten)
# if you want info about each company as a dataframe
bar2 <- bar %>% map(as.data.frame)
Upvotes: 1