brucezepplin
brucezepplin

Reputation: 9752

lapply is converting strings to factors

I have the following list element (a dataframe consisting of cells of strings and numbers where

as.data.frame(lapply(t1[3], function(y) gsub("\\s+", " ", y)),stringsAsFactors = FALSE)

produces a data frame of numbers

1                 c(84, 85, 1, 2, 3, 4, 5, 85, 6, 7, 8, 9, 85, 10, 11, 12, 13, 85, 14, 15, 16, 17, 85, 18, 85, 19, 20, 21, 22, 23, 24, 85, 25, 26, 27, 28, 29, 30, 31, 32, 33, 85, 34, 35, 85, 36, 37, 38, 39, 40, 85, 41, 42, 43, 44, 45, 85, 46, 47, 85, 48, 49, 85, 50, 51, 85, 52, 53, 54, 85, 55, 56, 85, 57, 58, 59, 85, 60, 61, 62, 85, 63, 85, 64, 85, 65, 85, 66, 85, 67, 85, 68, 85, 69, 85, 70, 85, 71, 85, 72, 85, 73, 85, 74, 75, 76, 85, 77, 85, 78, 79, 80, 81, 82, 83)
2 c(66, 114, 64, 21, 2, 102, 115, 52, 46, 47, 22, 53, 38, 80, 20, 39, 40, 62, 105, 28, 106, 63, 6, 7, 103, 67, 108, 71, 113, 112, 104, 10, 33, 15, 92, 97, 107, 55, 56, 41, 11, 36, 60, 37, 90, 70, 54, 8, 61, 91, 85, 3, 87, 75, 57, 86, 29, 69, 30, 59, 77, 74, 65, 99, 58, 93, 9, 19, 94, 34, 25, 35, 12, 68, 111, 13, 72, 98, 14, 73, 42, 43, 109, 110, 23, 24, 88, 89, 78, 79, 48, 49, 50, 51, 81, 82, 95, 96, 44, 45, 31, 32, 83, 16, 1, 84, 4, 5, 100, 17, 18, 101, 76, 27, 26)
3                                                                                                                           c(2, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 1, 1, 3, 1, 3, 1, 1, 1, 1, 1, 1)
4                                                                                                                           c(1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)

I am only exchanging multiple whitespace in the dataframe for a single whitespace, so why am I getting numbers back? I should be expecting strings

dput output of t1[3]:

dput(t1[3])
structure(list(`NULL` = structure(list(V1 = structure(c(84L, 
85L, 1L, 2L, 3L, 4L, 5L, 85L, 6L, 7L, 8L, 9L, 85L, 10L, 11L, 
12L, 13L, 85L, 14L, 15L, 16L, 17L, 85L, 18L, 85L, 19L, 20L, 21L, 
22L, 23L, 24L, 85L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 
85L, 34L, 35L, 85L, 36L, 37L, 38L, 39L, 40L, 85L, 41L, 42L, 43L, 
44L, 45L, 85L, 46L, 47L, 85L, 48L, 49L, 85L, 50L, 51L, 85L, 52L, 
53L, 54L, 85L, 55L, 56L, 85L, 57L, 58L, 59L, 85L, 60L, 61L, 62L, 
85L, 63L, 85L, 64L, 85L, 65L, 85L, 66L, 85L, 67L, 85L, 68L, 85L, 
69L, 85L, 70L, 85L, 71L, 85L, 72L, 85L, 73L, 85L, 74L, 75L, 76L, 
85L, 77L, 85L, 78L, 79L, 80L, 81L, 82L, 83L), .Label = c("01A", 
"01B", "01C", "01D", "01Z", "02A", "02B", "02C", "02Z", "03A", 
"03B", "03C", "03Z", "04A", "04B", "04C", "04Z", "05Z", "06A", 
"06B", "06C", "06D", "06E", "06Z", "07A", "07B", "07C", "07D", 
"07E", "07F", "07G", "07H", "07Z", "08A", "08Z", "09A", "09B", 
"09C", "09D", "09Z", "10A", "10B", "10C", "10D", "10Z", "11A", 
"11Z", "12A", "12B", "13A", "13Z", "14A", "14B", "14Z", "15A", 
"15Z", "16A", "16B", "16Z", "17A", "17B", "17Z", "18Z", "19Z", 
"20Z", "21Z", "22Z", "23Z", "24Z", "25Z", "26Z", "27Z", "28Z", 
"29A", "29B", "29Z", "30Z", "31A", "31B", "31Z", "97Z", "98Z", 
"99Z", "Value", " "), class = "factor"), V2 = structure(c(66L, 
114L, 64L, 21L, 2L, 102L, 115L, 52L, 46L, 47L, 22L, 53L, 38L, 
80L, 20L, 39L, 40L, 62L, 105L, 28L, 106L, 63L, 6L, 7L, 103L, 
67L, 108L, 71L, 113L, 112L, 104L, 10L, 33L, 15L, 92L, 97L, 107L, 
55L, 56L, 41L, 11L, 36L, 60L, 37L, 90L, 70L, 54L, 8L, 61L, 91L, 
85L, 3L, 87L, 75L, 57L, 86L, 29L, 69L, 30L, 59L, 77L, 74L, 65L, 
99L, 58L, 93L, 9L, 19L, 94L, 34L, 25L, 35L, 12L, 68L, 111L, 13L, 
72L, 98L, 14L, 73L, 42L, 43L, 109L, 110L, 23L, 24L, 88L, 89L, 
78L, 79L, 48L, 49L, 50L, 51L, 81L, 82L, 95L, 96L, 44L, 45L, 31L, 
32L, 83L, 16L, 1L, 84L, 4L, 5L, 100L, 17L, 18L, 101L, 76L, 27L, 
26L), .Label = c("Abdominal \r\n    Pain", "Abrasion", "Alcohol", 
"Allergy (including \r\n      Anaphylaxis)", "Allergy (including \r\n      Anaphylaxis), other or unspecified", 
"Amputation", "Amputation, other or \r\n      unspecified", "Animal Bite", 
"Asthma", "Burns, Scalds and \r\n      Thermal Conditions", "Burns, Scalds and Thermal \r\n      Conditions, other or unspecified", 
"Cardiovascular \r\n      Conditions", "Cardiovascular Conditions, \r\n      other or unspecified", 
"Cerebrovascular \r\n      Event", "Chemical", "Chest Pain, non \r\n      cardiac", 
"Chronic Alcohol \r\n      Abuse", "Chronic Drug \r\n      Abuse", 
"Chronic Obstructive \r\n      Pulmonary disease", "Closed \r\n    Fracture", 
"Contusion", "Dental \r\nInjury", "Dermatological \r\n      Conditions", 
"Dermatological Conditions, \r\n      other or unspecified", 
"Diabetes", "Diagnosis Not \r\n      Recorded", "Diagnosis Type Not \r\n      Otherwise Specified", 
"Dislocation", "Drowning", "Drowning, other or \r\n      unspecified", 
"Ear, Nose and Throat \r\n      Conditions", "Ear, Nose and Throat \r\n      Conditions, other or unspecified", 
"Electric", "Endocrinological \r\n      Conditions", "Endocrinological \r\n      Conditions, other or unspecified", 
"Foreign \r\n      Body", "Foreign Body, other or \r\n      unspecified", 
"Fracture", "Fracture \r\n      Dislocation", "Fracture, other or \r\n      unspecified", 
"Frostbite", "Gastrointestinal \r\n      Conditions", "Gastrointestinal \r\n      Conditions, other or unspecified", 
"Genito-Urinary \r\n      Medicine", "Genito-urinary Medicine, \r\n      other or unspecified", 
"Glasgow Coma Score \r\n      15", "Glasgow Coma Score \r\n      <15", 
"Gynaecological \r\n      Conditions", "Gynaecological Conditions, \r\n      other or unspecified", 
"Haematological \r\n      Conditions", "Haematological Conditions, \r\n      other or unspecified", 
"Head \r\n      Injury", "Head Injury, other or \r\n      unspecified", 
"Human Bite", "Hyperthermia", "Hypothermia", "Illicit \r\n      Drug", 
"Infection, other or \r\n      unspecified", "Infectious \r\n      Disease", 
"Ingested Foreign \r\n      Body", "Insect Bite or \r\n      Sting", 
"Joint \r\n      Injury", "Joint Injury, other or \r\n      unspecified", 
"Laceration", "Local \r\n      Infection", "Meaning", "Muscle \r\nInjury", 
"Myocardial \r\n      Infarction", "Near \r\nDrowning", "Needle Stick \r\n      Injury", 
"Nerve Injury", "Neurological Conditions", "Neurological Conditions, \r\n      other or unspecified", 
"Non-notifiable \r\n      Disease", "Non-prescribed/purchased \r\n      drug", 
"Nothing Abnormal \r\n      Detected", "Notifiable \r\n      Disease", 
"Obstetric \r\n      Conditions", "Obstetric Conditions, \r\n      other or unspecified", 
"Open \r\nFracture", "Ophthalmic \r\n      Conditions", "Ophthalmic Conditions, \r\n      other or unspecified", 
"Pain", "Pain, other or \r\n      unspecified", "Poisoning or \r\n      Overdose", 
"Poisoning or Overdose, \r\n      other or unspecified", "Prescribed \r\n      Drug", 
"Psychological/Psychiatric \r\n    Conditions", "Psychological/Psychiatric \r\n      Conditions, other or unspecified", 
"Puncture \r\n      Wounds", "Puncture Wounds, other or \r\n      unspecified", 
"Radiation", "Respiratory \r\n      Conditions", "Respiratory Conditions, \r\n      other or unspecified", 
"Rheumatological \r\n      Conditions", "Rheumatological \r\n      Conditions, other or unspecified", 
"Scald", "Seizure/Convulsion", "Septicaemia", "Social \r\n      Problems/Homelessness", 
"Social \r\n      Problems/Homelessness, other or unspecified", 
"Soft tissue \r\n      inflammation", "Soft Tissue \r\n      Injury", 
"Soft Tissue Injury, other \r\n      or unspecified", "Sprain", 
"Subluxation", "Sunburn", "Tendon \r\nInjury", "Urological Conditions", 
"Urological Conditions, \r\n      other or unspecified", "Vascular Condition", 
"Vascular \r\n    Injury", "Visceral \r\n    Injury", "Wound", 
"Wound, other or \r\n      unspecified"), class = "factor"), 
    V3 = structure(c(2L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 
    1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 3L, 
    1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 
    1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 
    1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 3L, 
    1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 
    3L, 1L, 3L, 1L, 1L, 1L, 3L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L
    ), .Label = c("1st July 2010", "Valid From", " "), class = "factor"), 
    V4 = structure(c(1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
    ), .Label = c("Valid To", " "), class = "factor")), .Names = c("V1", 
"V2", "V3", "V4"), row.names = c(NA, -115L), class = "data.frame")), .Names = "NULL")

Upvotes: 1

Views: 302

Answers (1)

akrun
akrun

Reputation: 887128

The t1[3] is still a list with one list element. We can select the element and then loop over the columns of the 'data.frame' in that, use the gsub. The output will be a list. This can be wrapped with data.frame.

lapply(t1[3][[1]], function(y) gsub("\\s+", " ", y))

Upvotes: 4

Related Questions