user2685761
user2685761

Reputation: 41

A weird question about parsing http header by the crate Nom in rust

I wrote some code that uses NOM to parse HTTP headers, but got some strange results, as shown in the example. In the main function, string input1 has only one more semicolon at the end than input2, but they got completely different results. Obviously, I expect similar results to the text(input2), thanks for helping me check what's going on.

[dependencies]
nom = "6"
use nom::{
    branch::alt,
    bytes::complete::{tag, tag_no_case, take_until},
    character::complete::multispace0,
    combinator::opt,
    multi::separated_list0,
    sequence::tuple,
};
use nom::{AsChar, IResult, InputTakeAtPosition};
use std::collections::HashMap;
use std::fmt::Debug;

#[derive(Debug, PartialEq)]
pub struct Header<'a> {
    pub options_header_cookies: HashMap<&'a str, &'a str>,
    pub options_headers_more: HashMap<&'a str, &'a str>,
}

#[allow(dead_code)]
fn key(input: &str) -> IResult<&str, &str> {
    input.split_at_position_complete(|item| {
        !(item.is_alphanum() || item.as_char() == '-' || item.as_char() == '_')
    })
}

#[allow(dead_code)]
fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
    let (input, cookies) = separated_list0(
        tag(";"),
        tuple((
            multispace0,
            key,
            tag("="),
            alt((take_until(";"), take_until("'"))),
            multispace0,
        )),
    )(input)?;
    Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}

#[allow(dead_code)]
fn options_header_cookie(input: &str) -> IResult<&str, HashMap<&str, &str>> {
    let (input, (_, _, cookies, _)) = tuple((
        alt((tag("-H\x20"), tag("--header\x20"))),
        tag_no_case("'cookie:\x20"),
        cookie_pairs,
        tag("'"),
    ))(input)?;
    Ok((input, cookies))
}

#[allow(dead_code)]
fn options_header_(input: &str) -> IResult<&str, (&str, &str)> {
    let (input, (_, k, _, v, _)) = tuple((
        alt((tag("-H\x20'"), tag("--header\x20'"))),
        key,
        tag(":\x20"),
        take_until("'"),
        tag("'"),
    ))(input)?;
    Ok((input, (k, v)))
}

fn text(input: &str) -> IResult<&str, Header> {
    let mut h = Header {
        options_header_cookies: HashMap::new(),
        options_headers_more: HashMap::new(),
    };
    let (input, opts) = separated_list0(
        tag("\x20"),
        tuple((
            opt(tag("\\\n")),
            multispace0,
            tuple((opt(options_header_cookie), opt(options_header_))),
        )),
    )(input)?;
    for (_, _, o) in opts {
        if let Some(cookies) = o.0 {
            h.options_header_cookies = cookies;
            continue;
        }else if let Some(header) = o.1 {
            h.options_headers_more.insert(header.0, header.1);
            continue;
        }
    }
    Ok((input, h))
}

#[allow(dead_code)]
fn debug<T: Debug>(o: T) {
    println!("=> {:#?}", o);
}

fn main() {
    let input1 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh;'"#;

    debug(text(input1));

    let input2 = r#"
-H 'Cookie: NID=219=Ji47zdfV6mSKlkKmpVf8F67O80WTSw; DV=03-vBWQ2RBEqsNFUD5FEuieRJvkwrRfXaKa0v0Cj2wAAAAA' \
-H 'User-Agent: Mozilla/5.0 Macintosh'"#;

    debug(text(input2));
}

Upvotes: 2

Views: 398

Answers (1)

Svetlin Zarev
Svetlin Zarev

Reputation: 15663

The issue lies in your cookie_pairs() parser:

fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
    let (input, cookies) = separated_list0(
        tag(";"),
        tuple((
            multispace0,
            key,
            tag("="),
            alt((take_until(";"), take_until("'"))),
            multispace0,
        )),
    )(input)?;
    Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}

The alt() combinator runs the first parser to completion and only if it fails, then tries the second:

alt((take_until(";"), take_until("'")))

So in the case with a trailing ; that parser essentially consumes the whole input, thus making the parent parsers fail and not returning any cookies.

The fix is simple. You have to replace it with:

take_while(|ch| ch != '\'' && ch != ';')

which will stop when either ' or ; appears and will not consume the whole input:

fn cookie_pairs(input: &str) -> IResult<&str, HashMap<&str, &str>> {
    let (input, cookies) = separated_list0(
        tag(";"),
        tuple((
            multispace0,
            key,
            tag("="),
            take_while(|ch| ch != '\'' && ch != ';'),
            multispace0,
        )),
    )(input)?;
    Ok((input, cookies.into_iter().map(|c| (c.1, c.3)).collect()))
}

Upvotes: 2

Related Questions