아파치 로그는 Common과
147.229.9.14 - - [28/May/2022:16:52:07 +0000] "GET /images/jordan-80.png HTTP/1.1" 304 45Combined 포맷으로 나뉜다.
147.229.9.14 - - [28/May/2022:16:52:07 +0000] "GET /images/jordan-80.png HTTP/1.1" 304 45 "http://www.semicomplete.com/articles/dynamic-dns-with-dhcp/" "Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0"filter { grok { match => {"message" => "%{COMMONAPACHELOG}"} }}[2023-06-03T16:17:08,979][INFO ][logstash.agent ] Pipelines running {:count=>1, :running_pipelines=>[:main], :non_running_pipelines=>[]}{ "ident" => "-", "timestamp" => "28/May/2022:16:52:07 +0000", "clientip" => "147.229.9.14", "bytes" => "45", "verb" => "GET", "request" => "/images/jordan-80.png", "response" => "304", "message" => "147.229.9.14 - - [28/May/2022:16:52:07 +0000] \"GET /images/jordan-80.png HTTP/1.1\" 304 45\r", "httpversion" => "1.1", "auth" => "-"}Combined 로그는 grok 패턴 COMBINEDAPACHELOG로 기본 테이블 구조 완성 가능.
filter { grok { match => {"message" => "%{COMBINEDAPACHELOG}"} }}[2023-06-03T16:18:52,957][INFO ][logstash.agent ] Pipelines running {:count=>1, :running_pipelines=>[:main], :non_running_pipelines=>[]}{ "ident" => "-", "bytes" => "45", "clientip" => "147.229.9.14", "agent" => "\"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0\"", "request" => "/images/jordan-80.png", "referrer" => "\"http://www.semicomplete.com/articles/dynamic-dns-with-dhcp/\"", "timestamp" => "28/May/2022:16:52:07 +0000", "verb" => "GET", "response" => "304", "message" => "147.229.9.14 - - [28/May/2022:16:52:07 +0000] \"GET /images/jordan-80.png HTTP/1.1\" 304 45 \"http://www.semicomplete.com/articles/dynamic-dns-with-dhcp/\" \"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0\"\r", "httpversion" => "1.1", "auth" => "-"}그런데 COMBINEDAPACHELOG 패턴 실행 결과를 보면 agent, referrer 필드값의 인용부호(")가 사라지지 않음을 알 수 있다. 해당 인용부호가 데이터 활용에 영향을 주지는 않는데 좀 거슬림(..)
인용부호 제거
보통은 dissect로 대충 자르고, grok로 다듬는 편인데 오늘은 좀 다르게 해보자. 커스텀 grok 정규표현식 패턴 준비.
filter { grok { patterns_dir => "d:/myregex.txt" match => {"message" => "%{COMBINEDAPACHELOG}"} }}[2023-06-03T16:25:35,705][INFO ][logstash.agent ] Pipelines running {:count=>1, :running_pipelines=>[:main], :non_running_pipelines=>[]}{ "clientip" => "147.229.9.14", "bytes" => "45", "timestamp" => "28/May/2022:16:52:07 +0000", "agent" => "Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0", "method" => "GET", "referrer" => "http://www.semicomplete.com/articles/dynamic-dns-with-dhcp/", "uri" => "/images/jordan-80.png", "message" => "147.229.9.14 - - [28/May/2022:16:52:07 +0000] \"GET /images/jordan-80.png HTTP/1.1\" 304 45 \"http://www.semicomplete.com/articles/dynamic-dns-with-dhcp/\" \"Mozilla/5.0 (Windows NT 5.1; rv:27.0) Gecko/20100101 Firefox/27.0\"\r", "status" => "304"}아니면 내장된 grok 패턴 사용 후, 인용부호 제거 과정 추가.
filter { grok { match => {"message" => "%{COMBINEDAPACHELOG}"} }
dissect { mapping => {"agent" => '"%{agent}"'} mapping => {"referrer" => '"%{referrer}"'} }}관련 글


댓글 없음:
댓글 쓰기