difft/
gitattributes.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
use std::path::Path;
use std::process::Command;

/// The result of checking file's `diff` and `binary` attributes with `git check-attr`.
/// See man gitattributes, specifically **Generating diff text** section.
#[derive(Debug, PartialEq)]
pub(crate) enum DiffAttribute {
    /// The file type (text vs binary) will be autodetected.
    ///
    /// This is the default setting, when `diff` attribute is not present.
    Unspecified,

    /// The file should be treated as text.
    ///
    /// Corresponds to set `diff` attribute, unless `binary` attribute is set.
    AssumeText,

    /// The file should be treated as binary, and no diff will be shown.
    /// Sometimes this option is set for generated text files.
    ///
    /// Corresponds to explicitly unset `diff` attribute (`-diff`), or when `binary` attribute is
    /// set.
    AssumeBinary,
}

/// Runs `git check-attr diff binary` to get the diff and binary attributes of the path. Returns
/// [`Option::None`] when either `git` is not available, file is not inside git directory, or
/// something else went wrong.
pub(crate) fn check_diff_attr(path: &Path) -> Option<DiffAttribute> {
    let res = Command::new("git")
        .args(["check-attr", "diff", "binary", "-z", "--"])
        .arg(path)
        .output();

    match res {
        Ok(output) => {
            // Running git outside of git repository, or perhaps many other error conditions, will
            // result in this. Since we run git check-attr eagerly, and can't distinguish the error
            // conditions easily and reliably, log just a debug message.
            if !output.status.success() {
                debug!(
                    "git check-attr exited with status {}: \"{}\"",
                    output.status,
                    output.stderr.escape_ascii()
                );
                return None;
            }

            let output = &output.stdout;
            debug!(
                "git check-attr output: {:?}",
                String::from_utf8_lossy(output)
            );

            match parse_output(output) {
                Some(res) => {
                    trace!("git check-attr result: {:?}", res);
                    return Some(res);
                }
                None => {
                    warn!(
                        "malformed git check-attr output: \"{}\"",
                        output.escape_ascii()
                    );
                }
            }
        }
        Err(err) => {
            debug!("failed to execute git: {err}");
        }
    }

    None
}

fn parse_output(output: &[u8]) -> Option<DiffAttribute> {
    // The git check-attr -z output format is repeated
    //     <path> NUL <attribute> NUL <info> NUL
    //
    // This function assumes git check-attr diff binary -z -- PATH, so the output contains
    // attributes only for the specified file, and thus we don't even look at path.

    let mut binary_set = false;
    let mut result = DiffAttribute::Unspecified;

    let mut it = output.split(|&b| b == b'\0');
    while let Some(path) = it.next() {
        if path.is_empty() {
            // Bogus "path" after the last NUL.
            // It would be a tad cleaner if split_terminator was available for &[u8]...
            break;
        }
        let attribute = it.next()?;
        let info = it.next()?;

        match attribute {
            b"diff" => {
                // this is only lossy for custom driver variant, which we don't support, and it's
                // quite unlikely to contain invalid UTF-8 (who puts their binaries in files named
                // by invalid strings?)
                match info {
                    b"set" => result = DiffAttribute::AssumeText,
                    b"unset" => result = DiffAttribute::AssumeBinary,
                    _ => (),
                }
            }
            b"binary" => {
                if info == b"set" {
                    binary_set = true;
                }
            }
            _ => {
                warn!(
                    "unexpected attribute in git check-attr output: \"{}\"",
                    attribute.escape_ascii()
                );
            }
        }
    }

    if binary_set {
        // assume user doesn't want to see the diff, even if they specify diff=whatever
        result = DiffAttribute::AssumeBinary;
    }

    Some(result)
}

#[cfg(test)]
mod tests {
    use pretty_assertions::assert_eq;

    use super::*;

    #[test]
    fn test_parse_output() {
        // The test case assumes fictional git repository with the following .gitattributes
        // contents to ignore generated protobuf files, but see generated gRPC files, and uses
        // imaginary "hex" diff driver for wtf.bin files:
        //
        //     /protos/*.pb.go -diff
        //     /protos/*_grpc.pb.go diff
        //     wtf.bin binary diff=hex
        //

        // This is a plaintext file, no attributes, assume regular text diff
        assert_eq!(
            parse_output(b"protos/difft.proto\x00diff\x00unspecified\x00binary\x00unspecified\x00"),
            Some(DiffAttribute::Unspecified)
        );

        // This is a generated file with diff attribute unset (we don't want to look into its diff)
        assert_eq!(
            parse_output(b"protos/difft.pb.go\x00diff\x00unset\x00binary\x00unspecified\x00"),
            Some(DiffAttribute::AssumeBinary)
        );

        // diff attribute is explicitly re-enabled for this file
        assert_eq!(
            parse_output(b"protos/difft_grpc.pb.go\x00diff\x00set\x00binary\x00unspecified\x00"),
            Some(DiffAttribute::AssumeText)
        );

        // Although diff=hex basically undoes the effect of binary macro attribute, we assume the
        // user doesn't want to see them here. difft doesn't run git diff drivers, after all.
        assert_eq!(
            parse_output(b"wtf.bin\x00diff\x00hex\x00wtf.bin\x00binary\x00set\x00"),
            Some(DiffAttribute::AssumeBinary)
        );
    }
}