Reputation: 11234
I have a log file having the below format from which I would like to extract ip, datetime and uri
and load into a table.
64.242.88.10 - - [07/Mar/2004:17:09:01 -0800] "GET /twiki/bin/search/Main/SearchResult?scope=text&search=Joris%20*Benschop[^A-Za-z] HTTP/1.1" 200 4284
I am able to do that by loading log file rows as a single string into table as below and by using regexp_extract
.
create table logs( line string);
load data local inpath '.../mylog.log' into table logs;
select
regexp_extract(line, '(.*) (- -) \\[(.*) -.*\\] \\"GET (.*)\\?',1),--ip
regexp_extract(line, '(.*) (- -) \\[(.*) -.*\\] \\"GET (.*)\\?',3),--datetime
regexp_extract(line, '(.*) (- -) \\[(.*) -.*\\] \\"GET (.*)\\?',4) --uri
from logs limit 10;
+---------------+-----------------------+--------------------------------------------+--+
| _c0 | _c1 | _c2 |
+---------------+-----------------------+--------------------------------------------+--+
| 64.242.88.10 | 07/Mar/2004:17:09:01 | /twiki/bin/search/Main/SearchResult |
| 64.242.88.10 | 07/Mar/2004:17:10:20 | /twiki/bin/oops/TWiki/TextFormattingRules |
+---------------+-----------------------+--------------------------------------------+--+
What I would like to do is to create a table specifying SerDe properties and load it without using regexp_extract
function. I tried the below and its not working.
create table logs (
ip string,
day timestamp,
url string)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ("input.regex" =
"(.*) [^- - \[](.*) [^-.*\]] \"([^GET].*\?)");
load data local inpath ".../mylog.log" into table logs;
I appreciate some help and direction.
Upvotes: 0
Views: 1507
Reputation: 44921
day
cannot be timestamp since it is not in ISO format (yyyy-MM-dd HH:mm:ss
)\\
).*
) create external table logs
(
ip string
,day string
,url string
)
row format serde 'org.apache.hadoop.hive.serde2.RegexSerDe'
with serdeproperties ("input.regex" = "(\\S+).*?\\[(.*?)\\s.*?(/.*?)\\?.*")
;
select * from logs
;
+--------------+----------------------+-------------------------------------+
| ip | day | url |
+--------------+----------------------+-------------------------------------+
| 64.242.88.10 | 07/Mar/2004:17:09:01 | /twiki/bin/search/Main/SearchResult |
+--------------+----------------------+-------------------------------------+
Upvotes: 1