Refactor parseTreeEntries, speed up tree list (#21368)
Close #20315 (fix the panic when parsing invalid input), Speed up #20231 (use ls-tree without size field) Introduce ListEntriesRecursiveFast (ls-tree without size) and ListEntriesRecursiveWithSize (ls-tree with size)
This commit is contained in:
parent
69fc510d6d
commit
c08e42c47e
7 changed files with 109 additions and 53 deletions
|
@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
|
|||
return parseTreeEntries(data, nil)
|
||||
}
|
||||
|
||||
var sepSpace = []byte{' '}
|
||||
|
||||
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
|
||||
entries := make([]*TreeEntry, 0, 10)
|
||||
var err error
|
||||
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
|
||||
for pos := 0; pos < len(data); {
|
||||
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
|
||||
// expect line to be of the form:
|
||||
// <mode> <type> <sha> <space-padded-size>\t<filename>
|
||||
// <mode> <type> <sha>\t<filename>
|
||||
posEnd := bytes.IndexByte(data[pos:], '\n')
|
||||
if posEnd == -1 {
|
||||
posEnd = len(data)
|
||||
} else {
|
||||
posEnd += pos
|
||||
}
|
||||
line := data[pos:posEnd]
|
||||
posTab := bytes.IndexByte(line, '\t')
|
||||
if posTab == -1 {
|
||||
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
|
||||
}
|
||||
|
||||
entry := new(TreeEntry)
|
||||
entry.ptree = ptree
|
||||
if pos+6 > len(data) {
|
||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
||||
|
||||
entryAttrs := line[:posTab]
|
||||
entryName := line[posTab+1:]
|
||||
|
||||
entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
|
||||
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
|
||||
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
|
||||
if len(entryAttrs) > 0 {
|
||||
entrySize := entryAttrs // the last field is the space-padded-size
|
||||
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
|
||||
entry.sized = true
|
||||
}
|
||||
switch string(data[pos : pos+6]) {
|
||||
|
||||
switch string(entryMode) {
|
||||
case "100644":
|
||||
entry.entryMode = EntryModeBlob
|
||||
pos += 12 // skip over "100644 blob "
|
||||
case "100755":
|
||||
entry.entryMode = EntryModeExec
|
||||
pos += 12 // skip over "100755 blob "
|
||||
case "120000":
|
||||
entry.entryMode = EntryModeSymlink
|
||||
pos += 12 // skip over "120000 blob "
|
||||
case "160000":
|
||||
entry.entryMode = EntryModeCommit
|
||||
pos += 14 // skip over "160000 object "
|
||||
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
|
||||
entry.entryMode = EntryModeTree
|
||||
pos += 12 // skip over "040000 tree "
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
|
||||
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
|
||||
}
|
||||
|
||||
if pos+40 > len(data) {
|
||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
||||
}
|
||||
id, err := NewIDFromString(string(data[pos : pos+40]))
|
||||
entry.ID, err = NewIDFromString(string(entryObjectID))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
|
||||
}
|
||||
entry.ID = id
|
||||
pos += 41 // skip over sha and trailing space
|
||||
|
||||
end := pos + bytes.IndexByte(data[pos:], '\t')
|
||||
if end < pos {
|
||||
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
|
||||
}
|
||||
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
|
||||
entry.sized = true
|
||||
|
||||
pos = end + 1
|
||||
|
||||
end = pos + bytes.IndexByte(data[pos:], '\n')
|
||||
if end < pos {
|
||||
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
|
||||
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
|
||||
}
|
||||
|
||||
// In case entry name is surrounded by double quotes(it happens only in git-shell).
|
||||
if data[pos] == '"' {
|
||||
entry.name, err = strconv.Unquote(string(data[pos:end]))
|
||||
if len(entryName) > 0 && entryName[0] == '"' {
|
||||
entry.name, err = strconv.Unquote(string(entryName))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
|
||||
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
|
||||
}
|
||||
} else {
|
||||
entry.name = string(data[pos:end])
|
||||
entry.name = string(entryName)
|
||||
}
|
||||
|
||||
pos = end + 1
|
||||
pos = posEnd + 1
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return entries, nil
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue