Source file src/cmd/go/internal/cache/cache.go

     1  // Copyright 2017 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package cache implements a build artifact cache.
     6  package cache
     7  
     8  import (
     9  	"bytes"
    10  	"crypto/sha256"
    11  	"encoding/hex"
    12  	"errors"
    13  	"fmt"
    14  	"internal/godebug"
    15  	"io"
    16  	"io/fs"
    17  	"os"
    18  	"path/filepath"
    19  	"strconv"
    20  	"strings"
    21  	"time"
    22  
    23  	"cmd/go/internal/base"
    24  	"cmd/go/internal/lockedfile"
    25  	"cmd/go/internal/mmap"
    26  )
    27  
    28  // An ActionID is a cache action key, the hash of a complete description of a
    29  // repeatable computation (command line, environment variables,
    30  // input file contents, executable contents).
    31  type ActionID [HashSize]byte
    32  
    33  // An OutputID is a cache output key, the hash of an output of a computation.
    34  type OutputID [HashSize]byte
    35  
    36  // Cache is the interface as used by the cmd/go.
    37  type Cache interface {
    38  	// Get returns the cache entry for the provided ActionID.
    39  	// On miss, the error type should be of type *entryNotFoundError.
    40  	//
    41  	// After a successful call to Get, OutputFile(Entry.OutputID) must
    42  	// exist on disk until Close is called (at the end of the process).
    43  	Get(ActionID) (Entry, error)
    44  
    45  	// Put adds an item to the cache.
    46  	//
    47  	// The seeker is only used to seek to the beginning. After a call to Put,
    48  	// the seek position is not guaranteed to be in any particular state.
    49  	//
    50  	// As a special case, if the ReadSeeker is of type noVerifyReadSeeker,
    51  	// the verification from GODEBUG=goverifycache=1 is skipped.
    52  	//
    53  	// After a successful call to Put, OutputFile(OutputID) must
    54  	// exist on disk until Close is called (at the end of the process).
    55  	Put(ActionID, io.ReadSeeker) (_ OutputID, size int64, _ error)
    56  
    57  	// Close is called at the end of the go process. Implementations can do
    58  	// cache cleanup work at this phase, or wait for and report any errors from
    59  	// background cleanup work started earlier. Any cache trimming in one
    60  	// process should not cause the invariants of this interface to be
    61  	// violated in another process. Namely, a cache trim from one process should
    62  	// not delete an OutputID from disk that was recently Get or Put from
    63  	// another process. As a rule of thumb, don't trim things used in the last
    64  	// day.
    65  	Close() error
    66  
    67  	// OutputFile returns the path on disk where OutputID is stored.
    68  	//
    69  	// It's only called after a successful get or put call so it doesn't need
    70  	// to return an error; it's assumed that if the previous get or put succeeded,
    71  	// it's already on disk.
    72  	OutputFile(OutputID) string
    73  
    74  	// FuzzDir returns where fuzz files are stored.
    75  	FuzzDir() string
    76  }
    77  
    78  // A Cache is a package cache, backed by a file system directory tree.
    79  type DiskCache struct {
    80  	dir string
    81  	now func() time.Time
    82  }
    83  
    84  // Open opens and returns the cache in the given directory.
    85  //
    86  // It is safe for multiple processes on a single machine to use the
    87  // same cache directory in a local file system simultaneously.
    88  // They will coordinate using operating system file locks and may
    89  // duplicate effort but will not corrupt the cache.
    90  //
    91  // However, it is NOT safe for multiple processes on different machines
    92  // to share a cache directory (for example, if the directory were stored
    93  // in a network file system). File locking is notoriously unreliable in
    94  // network file systems and may not suffice to protect the cache.
    95  func Open(dir string) (*DiskCache, error) {
    96  	info, err := os.Stat(dir)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  	if !info.IsDir() {
   101  		return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
   102  	}
   103  	for i := 0; i < 256; i++ {
   104  		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
   105  		if err := os.MkdirAll(name, 0o777); err != nil {
   106  			return nil, err
   107  		}
   108  	}
   109  	c := &DiskCache{
   110  		dir: dir,
   111  		now: time.Now,
   112  	}
   113  	return c, nil
   114  }
   115  
   116  // fileName returns the name of the file corresponding to the given id.
   117  func (c *DiskCache) fileName(id [HashSize]byte, key string) string {
   118  	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
   119  }
   120  
   121  // An entryNotFoundError indicates that a cache entry was not found, with an
   122  // optional underlying reason.
   123  type entryNotFoundError struct {
   124  	Err error
   125  }
   126  
   127  func (e *entryNotFoundError) Error() string {
   128  	if e.Err == nil {
   129  		return "cache entry not found"
   130  	}
   131  	return fmt.Sprintf("cache entry not found: %v", e.Err)
   132  }
   133  
   134  func (e *entryNotFoundError) Unwrap() error {
   135  	return e.Err
   136  }
   137  
   138  const (
   139  	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
   140  	hexSize   = HashSize * 2
   141  	entrySize = 2 + 1 + hexSize + 1 + hexSize + 1 + 20 + 1 + 20 + 1
   142  )
   143  
   144  // verify controls whether to run the cache in verify mode.
   145  // In verify mode, the cache always returns errMissing from Get
   146  // but then double-checks in Put that the data being written
   147  // exactly matches any existing entry. This provides an easy
   148  // way to detect program behavior that would have been different
   149  // had the cache entry been returned from Get.
   150  //
   151  // verify is enabled by setting the environment variable
   152  // GODEBUG=gocacheverify=1.
   153  var verify = false
   154  
   155  var errVerifyMode = errors.New("gocacheverify=1")
   156  
   157  // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
   158  var DebugTest = false
   159  
   160  func init() { initEnv() }
   161  
   162  var (
   163  	gocacheverify = godebug.New("gocacheverify")
   164  	gocachehash   = godebug.New("gocachehash")
   165  	gocachetest   = godebug.New("gocachetest")
   166  )
   167  
   168  func initEnv() {
   169  	if gocacheverify.Value() == "1" {
   170  		gocacheverify.IncNonDefault()
   171  		verify = true
   172  	}
   173  	if gocachehash.Value() == "1" {
   174  		gocachehash.IncNonDefault()
   175  		debugHash = true
   176  	}
   177  	if gocachetest.Value() == "1" {
   178  		gocachetest.IncNonDefault()
   179  		DebugTest = true
   180  	}
   181  }
   182  
   183  // Get looks up the action ID in the cache,
   184  // returning the corresponding output ID and file size, if any.
   185  // Note that finding an output ID does not guarantee that the
   186  // saved file for that output ID is still available.
   187  func (c *DiskCache) Get(id ActionID) (Entry, error) {
   188  	if verify {
   189  		return Entry{}, &entryNotFoundError{Err: errVerifyMode}
   190  	}
   191  	return c.get(id)
   192  }
   193  
   194  type Entry struct {
   195  	OutputID OutputID
   196  	Size     int64
   197  	Time     time.Time // when added to cache
   198  }
   199  
   200  // get is Get but does not respect verify mode, so that Put can use it.
   201  func (c *DiskCache) get(id ActionID) (Entry, error) {
   202  	missing := func(reason error) (Entry, error) {
   203  		return Entry{}, &entryNotFoundError{Err: reason}
   204  	}
   205  	f, err := os.Open(c.fileName(id, "a"))
   206  	if err != nil {
   207  		return missing(err)
   208  	}
   209  	defer f.Close()
   210  	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
   211  	if n, err := io.ReadFull(f, entry); n > entrySize {
   212  		return missing(errors.New("too long"))
   213  	} else if err != io.ErrUnexpectedEOF {
   214  		if err == io.EOF {
   215  			return missing(errors.New("file is empty"))
   216  		}
   217  		return missing(err)
   218  	} else if n < entrySize {
   219  		return missing(errors.New("entry file incomplete"))
   220  	}
   221  	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
   222  		return missing(errors.New("invalid header"))
   223  	}
   224  	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
   225  	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
   226  	esize, entry := entry[1:1+20], entry[1+20:]
   227  	etime, entry := entry[1:1+20], entry[1+20:]
   228  	var buf [HashSize]byte
   229  	if _, err := hex.Decode(buf[:], eid); err != nil {
   230  		return missing(fmt.Errorf("decoding ID: %v", err))
   231  	} else if buf != id {
   232  		return missing(errors.New("mismatched ID"))
   233  	}
   234  	if _, err := hex.Decode(buf[:], eout); err != nil {
   235  		return missing(fmt.Errorf("decoding output ID: %v", err))
   236  	}
   237  	i := 0
   238  	for i < len(esize) && esize[i] == ' ' {
   239  		i++
   240  	}
   241  	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
   242  	if err != nil {
   243  		return missing(fmt.Errorf("parsing size: %v", err))
   244  	} else if size < 0 {
   245  		return missing(errors.New("negative size"))
   246  	}
   247  	i = 0
   248  	for i < len(etime) && etime[i] == ' ' {
   249  		i++
   250  	}
   251  	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
   252  	if err != nil {
   253  		return missing(fmt.Errorf("parsing timestamp: %v", err))
   254  	} else if tm < 0 {
   255  		return missing(errors.New("negative timestamp"))
   256  	}
   257  
   258  	c.markUsed(c.fileName(id, "a"))
   259  
   260  	return Entry{buf, size, time.Unix(0, tm)}, nil
   261  }
   262  
   263  // GetFile looks up the action ID in the cache and returns
   264  // the name of the corresponding data file.
   265  func GetFile(c Cache, id ActionID) (file string, entry Entry, err error) {
   266  	entry, err = c.Get(id)
   267  	if err != nil {
   268  		return "", Entry{}, err
   269  	}
   270  	file = c.OutputFile(entry.OutputID)
   271  	info, err := os.Stat(file)
   272  	if err != nil {
   273  		return "", Entry{}, &entryNotFoundError{Err: err}
   274  	}
   275  	if info.Size() != entry.Size {
   276  		return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
   277  	}
   278  	return file, entry, nil
   279  }
   280  
   281  // GetBytes looks up the action ID in the cache and returns
   282  // the corresponding output bytes.
   283  // GetBytes should only be used for data that can be expected to fit in memory.
   284  func GetBytes(c Cache, id ActionID) ([]byte, Entry, error) {
   285  	entry, err := c.Get(id)
   286  	if err != nil {
   287  		return nil, entry, err
   288  	}
   289  	data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
   290  	if sha256.Sum256(data) != entry.OutputID {
   291  		return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
   292  	}
   293  	return data, entry, nil
   294  }
   295  
   296  // GetMmap looks up the action ID in the cache and returns
   297  // the corresponding output bytes.
   298  // GetMmap should only be used for data that can be expected to fit in memory.
   299  // The boolean result indicates whether the file was opened.
   300  // If it is true, the caller should avoid attempting
   301  // to write to the file on Windows, because Windows locks
   302  // the open file, and writes to it will fail.
   303  func GetMmap(c Cache, id ActionID) ([]byte, Entry, bool, error) {
   304  	entry, err := c.Get(id)
   305  	if err != nil {
   306  		return nil, entry, false, err
   307  	}
   308  	md, opened, err := mmap.Mmap(c.OutputFile(entry.OutputID))
   309  	if err != nil {
   310  		return nil, Entry{}, opened, err
   311  	}
   312  	if int64(len(md.Data)) != entry.Size {
   313  		return nil, Entry{}, true, &entryNotFoundError{Err: errors.New("file incomplete")}
   314  	}
   315  	return md.Data, entry, true, nil
   316  }
   317  
   318  // OutputFile returns the name of the cache file storing output with the given OutputID.
   319  func (c *DiskCache) OutputFile(out OutputID) string {
   320  	file := c.fileName(out, "d")
   321  	isDir := c.markUsed(file)
   322  	if isDir { // => cached executable
   323  		entries, err := os.ReadDir(file)
   324  		if err != nil {
   325  			return fmt.Sprintf("DO NOT USE - missing binary cache entry: %v", err)
   326  		}
   327  		if len(entries) != 1 {
   328  			return "DO NOT USE - invalid binary cache entry"
   329  		}
   330  		return filepath.Join(file, entries[0].Name())
   331  	}
   332  	return file
   333  }
   334  
   335  // Time constants for cache expiration.
   336  //
   337  // We set the mtime on a cache file on each use, but at most one per mtimeInterval (1 hour),
   338  // to avoid causing many unnecessary inode updates. The mtimes therefore
   339  // roughly reflect "time of last use" but may in fact be older by at most an hour.
   340  //
   341  // We scan the cache for entries to delete at most once per trimInterval (1 day).
   342  //
   343  // When we do scan the cache, we delete entries that have not been used for
   344  // at least trimLimit (5 days). Statistics gathered from a month of usage by
   345  // Go developers found that essentially all reuse of cached entries happened
   346  // within 5 days of the previous reuse. See golang.org/issue/22990.
   347  const (
   348  	mtimeInterval = 1 * time.Hour
   349  	trimInterval  = 24 * time.Hour
   350  	trimLimit     = 5 * 24 * time.Hour
   351  )
   352  
   353  // markUsed makes a best-effort attempt to update mtime on file,
   354  // so that mtime reflects cache access time.
   355  //
   356  // Because the reflection only needs to be approximate,
   357  // and to reduce the amount of disk activity caused by using
   358  // cache entries, used only updates the mtime if the current
   359  // mtime is more than an hour old. This heuristic eliminates
   360  // nearly all of the mtime updates that would otherwise happen,
   361  // while still keeping the mtimes useful for cache trimming.
   362  //
   363  // markUsed reports whether the file is a directory (an executable cache entry).
   364  func (c *DiskCache) markUsed(file string) (isDir bool) {
   365  	info, err := os.Stat(file)
   366  	if err != nil {
   367  		return false
   368  	}
   369  	if now := c.now(); now.Sub(info.ModTime()) >= mtimeInterval {
   370  		os.Chtimes(file, now, now)
   371  	}
   372  	return info.IsDir()
   373  }
   374  
   375  func (c *DiskCache) Close() error { return c.Trim() }
   376  
   377  // Trim removes old cache entries that are likely not to be reused.
   378  func (c *DiskCache) Trim() error {
   379  	now := c.now()
   380  
   381  	// We maintain in dir/trim.txt the time of the last completed cache trim.
   382  	// If the cache has been trimmed recently enough, do nothing.
   383  	// This is the common case.
   384  	// If the trim file is corrupt, detected if the file can't be parsed, or the
   385  	// trim time is too far in the future, attempt the trim anyway. It's possible that
   386  	// the cache was full when the corruption happened. Attempting a trim on
   387  	// an empty cache is cheap, so there wouldn't be a big performance hit in that case.
   388  	skipTrim := func(data []byte) bool {
   389  		if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil {
   390  			lastTrim := time.Unix(t, 0)
   391  			if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval {
   392  				return true
   393  			}
   394  		}
   395  		return false
   396  	}
   397  	// Check to see if we need a trim. Do this check separately from the lockedfile.Transform
   398  	// so that we can skip getting an exclusive lock in the common case.
   399  	if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil {
   400  		if skipTrim(data) {
   401  			return nil
   402  		}
   403  	}
   404  
   405  	errFileChanged := errors.New("file changed")
   406  
   407  	// Write the new timestamp before we start trimming to reduce the chance that multiple invocations
   408  	// try to trim at the same time, causing contention in CI (#76314).
   409  	err := lockedfile.Transform(filepath.Join(c.dir, "trim.txt"), func(data []byte) ([]byte, error) {
   410  		if skipTrim(data) {
   411  			// The timestamp in the file no longer meets the criteria for us to
   412  			// do a trim. It must have been updated by another go command invocation
   413  			// since we last read it. Skip the trim.
   414  			return nil, errFileChanged
   415  		}
   416  		return fmt.Appendf(nil, "%d", now.Unix()), nil
   417  	})
   418  	if errors.Is(err, errors.ErrUnsupported) {
   419  		return err
   420  	}
   421  	if errors.Is(err, errFileChanged) {
   422  		// Skip the trim because we don't need it anymore.
   423  		return nil
   424  	}
   425  
   426  	// Trim each of the 256 subdirectories.
   427  	// We subtract an additional mtimeInterval
   428  	// to account for the imprecision of our "last used" mtimes.
   429  	cutoff := now.Add(-trimLimit - mtimeInterval)
   430  	for i := 0; i < 256; i++ {
   431  		subdir := filepath.Join(c.dir, fmt.Sprintf("%02x", i))
   432  		c.trimSubdir(subdir, cutoff)
   433  	}
   434  
   435  	return nil
   436  }
   437  
   438  // trimSubdir trims a single cache subdirectory.
   439  func (c *DiskCache) trimSubdir(subdir string, cutoff time.Time) {
   440  	// Read all directory entries from subdir before removing
   441  	// any files, in case removing files invalidates the file offset
   442  	// in the directory scan. Also, ignore error from f.Readdirnames,
   443  	// because we don't care about reporting the error and we still
   444  	// want to process any entries found before the error.
   445  	f, err := os.Open(subdir)
   446  	if err != nil {
   447  		return
   448  	}
   449  	names, _ := f.Readdirnames(-1)
   450  	f.Close()
   451  
   452  	for _, name := range names {
   453  		// Remove only cache entries (xxxx-a and xxxx-d).
   454  		if !strings.HasSuffix(name, "-a") && !strings.HasSuffix(name, "-d") {
   455  			continue
   456  		}
   457  		entry := filepath.Join(subdir, name)
   458  		info, err := os.Stat(entry)
   459  		if err == nil && info.ModTime().Before(cutoff) {
   460  			if info.IsDir() { // executable cache entry
   461  				os.RemoveAll(entry)
   462  				continue
   463  			}
   464  			os.Remove(entry)
   465  		}
   466  	}
   467  }
   468  
   469  // putIndexEntry adds an entry to the cache recording that executing the action
   470  // with the given id produces an output with the given output id (hash) and size.
   471  func (c *DiskCache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify bool) error {
   472  	// Note: We expect that for one reason or another it may happen
   473  	// that repeating an action produces a different output hash
   474  	// (for example, if the output contains a time stamp or temp dir name).
   475  	// While not ideal, this is also not a correctness problem, so we
   476  	// don't make a big deal about it. In particular, we leave the action
   477  	// cache entries writable specifically so that they can be overwritten.
   478  	//
   479  	// Setting GODEBUG=gocacheverify=1 does make a big deal:
   480  	// in verify mode we are double-checking that the cache entries
   481  	// are entirely reproducible. As just noted, this may be unrealistic
   482  	// in some cases but the check is also useful for shaking out real bugs.
   483  	entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
   484  	if verify && allowVerify {
   485  		old, err := c.get(id)
   486  		if err == nil && (old.OutputID != out || old.Size != size) {
   487  			// panic to show stack trace, so we can see what code is generating this cache entry.
   488  			msg := fmt.Sprintf("go: internal cache error: cache verify failed: id=%x changed:<<<\n%s\n>>>\nold: %x %d\nnew: %x %d", id, reverseHash(id), out, size, old.OutputID, old.Size)
   489  			panic(msg)
   490  		}
   491  	}
   492  	file := c.fileName(id, "a")
   493  
   494  	// Copy file to cache directory.
   495  	mode := os.O_WRONLY | os.O_CREATE
   496  	f, err := os.OpenFile(file, mode, 0o666)
   497  	if err != nil {
   498  		return err
   499  	}
   500  	_, err = f.WriteString(entry)
   501  	if err == nil {
   502  		// Truncate the file only *after* writing it.
   503  		// (This should be a no-op, but truncate just in case of previous corruption.)
   504  		//
   505  		// This differs from os.WriteFile, which truncates to 0 *before* writing
   506  		// via os.O_TRUNC. Truncating only after writing ensures that a second write
   507  		// of the same content to the same file is idempotent, and does not — even
   508  		// temporarily! — undo the effect of the first write.
   509  		err = f.Truncate(int64(len(entry)))
   510  	}
   511  	if closeErr := f.Close(); err == nil {
   512  		err = closeErr
   513  	}
   514  	if err != nil {
   515  		// TODO(bcmills): This Remove potentially races with another go command writing to file.
   516  		// Can we eliminate it?
   517  		os.Remove(file)
   518  		return err
   519  	}
   520  	os.Chtimes(file, c.now(), c.now()) // mainly for tests
   521  
   522  	return nil
   523  }
   524  
   525  // noVerifyReadSeeker is an io.ReadSeeker wrapper sentinel type
   526  // that says that Cache.Put should skip the verify check
   527  // (from GODEBUG=goverifycache=1).
   528  type noVerifyReadSeeker struct {
   529  	io.ReadSeeker
   530  }
   531  
   532  // Put stores the given output in the cache as the output for the action ID.
   533  // It may read file twice. The content of file must not change between the two passes.
   534  func (c *DiskCache) Put(id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   535  	wrapper, isNoVerify := file.(noVerifyReadSeeker)
   536  	if isNoVerify {
   537  		file = wrapper.ReadSeeker
   538  	}
   539  	return c.put(id, "", file, !isNoVerify)
   540  }
   541  
   542  // PutExecutable is used to store the output as the output for the action ID into a
   543  // file with the given base name, with the executable mode bit set.
   544  // It may read file twice. The content of file must not change between the two passes.
   545  func (c *DiskCache) PutExecutable(id ActionID, name string, file io.ReadSeeker) (OutputID, int64, error) {
   546  	if name == "" {
   547  		panic("PutExecutable called without a name")
   548  	}
   549  	wrapper, isNoVerify := file.(noVerifyReadSeeker)
   550  	if isNoVerify {
   551  		file = wrapper.ReadSeeker
   552  	}
   553  	return c.put(id, name, file, !isNoVerify)
   554  }
   555  
   556  // PutNoVerify is like Put but disables the verify check
   557  // when GODEBUG=goverifycache=1 is set.
   558  // It is meant for data that is OK to cache but that we expect to vary slightly from run to run,
   559  // like test output containing times and the like.
   560  func PutNoVerify(c Cache, id ActionID, file io.ReadSeeker) (OutputID, int64, error) {
   561  	return c.Put(id, noVerifyReadSeeker{file})
   562  }
   563  
   564  func (c *DiskCache) put(id ActionID, executableName string, file io.ReadSeeker, allowVerify bool) (OutputID, int64, error) {
   565  	// Compute output ID.
   566  	h := sha256.New()
   567  	if _, err := file.Seek(0, 0); err != nil {
   568  		return OutputID{}, 0, err
   569  	}
   570  	size, err := io.Copy(h, file)
   571  	if err != nil {
   572  		return OutputID{}, 0, err
   573  	}
   574  	var out OutputID
   575  	h.Sum(out[:0])
   576  
   577  	// Copy to cached output file (if not already present).
   578  	fileMode := fs.FileMode(0o666)
   579  	if executableName != "" {
   580  		fileMode = 0o777
   581  	}
   582  	if err := c.copyFile(file, executableName, out, size, fileMode); err != nil {
   583  		return out, size, err
   584  	}
   585  
   586  	// Add to cache index.
   587  	return out, size, c.putIndexEntry(id, out, size, allowVerify)
   588  }
   589  
   590  // PutBytes stores the given bytes in the cache as the output for the action ID.
   591  func PutBytes(c Cache, id ActionID, data []byte) error {
   592  	_, _, err := c.Put(id, bytes.NewReader(data))
   593  	return err
   594  }
   595  
   596  // copyFile copies file into the cache, expecting it to have the given
   597  // output ID and size, if that file is not present already.
   598  func (c *DiskCache) copyFile(file io.ReadSeeker, executableName string, out OutputID, size int64, perm os.FileMode) error {
   599  	name := c.fileName(out, "d") // TODO(matloob): use a different suffix for the executable cache?
   600  	info, err := os.Stat(name)
   601  	if executableName != "" {
   602  		// This is an executable file. The file at name won't hold the output itself, but will
   603  		// be a directory that holds the output, named according to executableName. Check to see
   604  		// if the directory already exists, and if it does not, create it. Then reset name
   605  		// to the name we want the output written to.
   606  		if err != nil {
   607  			if !os.IsNotExist(err) {
   608  				return err
   609  			}
   610  			if err := os.Mkdir(name, 0o777); err != nil {
   611  				return err
   612  			}
   613  			if info, err = os.Stat(name); err != nil {
   614  				return err
   615  			}
   616  		}
   617  		if !info.IsDir() {
   618  			return errors.New("internal error: invalid binary cache entry: not a directory")
   619  		}
   620  
   621  		// directory exists. now set name to the inner file
   622  		name = filepath.Join(name, executableName)
   623  		info, err = os.Stat(name)
   624  	}
   625  	if err == nil && info.Size() == size {
   626  		// Check hash.
   627  		if f, err := os.Open(name); err == nil {
   628  			h := sha256.New()
   629  			io.Copy(h, f)
   630  			f.Close()
   631  			var out2 OutputID
   632  			h.Sum(out2[:0])
   633  			if out == out2 {
   634  				return nil
   635  			}
   636  		}
   637  		// Hash did not match. Fall through and rewrite file.
   638  	}
   639  
   640  	// Copy file to cache directory.
   641  	mode := os.O_RDWR | os.O_CREATE
   642  	if err == nil && info.Size() > size { // shouldn't happen but fix in case
   643  		mode |= os.O_TRUNC
   644  	}
   645  	f, err := os.OpenFile(name, mode, perm)
   646  	if err != nil {
   647  		if base.IsETXTBSY(err) {
   648  			// This file is being used by an executable. It must have
   649  			// already been written by another go process and then run.
   650  			// return without an error.
   651  			return nil
   652  		}
   653  		return err
   654  	}
   655  	defer f.Close()
   656  	if size == 0 {
   657  		// File now exists with correct size.
   658  		// Only one possible zero-length file, so contents are OK too.
   659  		// Early return here makes sure there's a "last byte" for code below.
   660  		return nil
   661  	}
   662  
   663  	// From here on, if any of the I/O writing the file fails,
   664  	// we make a best-effort attempt to truncate the file f
   665  	// before returning, to avoid leaving bad bytes in the file.
   666  
   667  	// Copy file to f, but also into h to double-check hash.
   668  	if _, err := file.Seek(0, 0); err != nil {
   669  		f.Truncate(0)
   670  		return err
   671  	}
   672  	h := sha256.New()
   673  	w := io.MultiWriter(f, h)
   674  	if _, err := io.CopyN(w, file, size-1); err != nil {
   675  		f.Truncate(0)
   676  		return err
   677  	}
   678  	// Check last byte before writing it; writing it will make the size match
   679  	// what other processes expect to find and might cause them to start
   680  	// using the file.
   681  	buf := make([]byte, 1)
   682  	if _, err := file.Read(buf); err != nil {
   683  		f.Truncate(0)
   684  		return err
   685  	}
   686  	h.Write(buf)
   687  	sum := h.Sum(nil)
   688  	if !bytes.Equal(sum, out[:]) {
   689  		f.Truncate(0)
   690  		return fmt.Errorf("file content changed underfoot")
   691  	}
   692  
   693  	// Commit cache file entry.
   694  	if _, err := f.Write(buf); err != nil {
   695  		f.Truncate(0)
   696  		return err
   697  	}
   698  	if err := f.Close(); err != nil {
   699  		// Data might not have been written,
   700  		// but file may look like it is the right size.
   701  		// To be extra careful, remove cached file.
   702  		os.Remove(name)
   703  		return err
   704  	}
   705  	os.Chtimes(name, c.now(), c.now()) // mainly for tests
   706  
   707  	return nil
   708  }
   709  
   710  // FuzzDir returns a subdirectory within the cache for storing fuzzing data.
   711  // The subdirectory may not exist.
   712  //
   713  // This directory is managed by the internal/fuzz package. Files in this
   714  // directory aren't removed by the 'go clean -cache' command or by Trim.
   715  // They may be removed with 'go clean -fuzzcache'.
   716  //
   717  // TODO(#48526): make Trim remove unused files from this directory.
   718  func (c *DiskCache) FuzzDir() string {
   719  	return filepath.Join(c.dir, "fuzz")
   720  }
   721  

View as plain text