From 85f6df458be08be7f3c637db2312a599a4c7ee74 Mon Sep 17 00:00:00 2001 From: Christian Oder Date: Wed, 27 Sep 2017 01:09:29 +0200 Subject: [PATCH] Allow reading FileInfo from a dummy file instead of the file itself The system is using a hybrid mode. This means, that if a file turns out to be an invalid json file, it will be handled "normally" --- README.md | 1 + config/config.go | 2 ++ mirrorbits.conf | 1 + scan/scan.go | 83 ++++++++++++++++++++++++++++++++++++++---------- 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index c9a3a6b1..5c930ab2 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,7 @@ DisallowRedirects | Disable any mirror trying to do an HTTP redirect WeightDistributionRange | Multiplier of the distance to the first mirror to find other possible mirrors in order to distribute the load DisableOnMissingFile | Disable a mirror if an advertised file on rsync/ftp appears to be missing on HTTP MaxLinkHeaders | Amount of backup mirror locations returned in HTTP headers +DummyFiles | Allows reading file information from a dummy json file. This Allows saving storage on the host. Fallbacks | A list of possible mirrors to use as fallback if a request fails or if the database is unreachable. **These mirrors are not tracked by mirrorbits.** It is assumed they have all the files available in the local repository. ## Running diff --git a/config/config.go b/config/config.go index 3e7f79c2..f62dcdea 100644 --- a/config/config.go +++ b/config/config.go @@ -35,6 +35,7 @@ var ( CheckInterval: 1, RepositoryScanInterval: 5, MaxLinkHeaders: 10, + DummyFiles: false, Hashes: hashing{ SHA1: true, SHA256: false, @@ -70,6 +71,7 @@ type Configuration struct { CheckInterval int `yaml:"CheckInterval"` RepositoryScanInterval int `yaml:"RepositoryScanInterval"` MaxLinkHeaders int `yaml:"MaxLinkHeaders"` + DummyFiles bool `yaml:"DummyFiles"` Hashes hashing `yaml:"Hashes"` DisallowRedirects bool `yaml:"DisallowRedirects"` WeightDistributionRange float32 `yaml:"WeightDistributionRange"` diff --git a/mirrorbits.conf b/mirrorbits.conf index 8cd6bfd4..895c6220 100644 --- a/mirrorbits.conf +++ b/mirrorbits.conf @@ -21,6 +21,7 @@ ConcurrentSync: 5 ScanInterval: 30 CheckInterval: 1 RepositoryScanInterval: 5 +DummyFiles: false Hashes: SHA256: On SHA1: Off diff --git a/scan/scan.go b/scan/scan.go index 56f8b210..38948155 100644 --- a/scan/scan.go +++ b/scan/scan.go @@ -4,6 +4,7 @@ package scan import ( + "encoding/json" "errors" "fmt" "os" @@ -62,6 +63,14 @@ type scan struct { count uint } +type DummyFile struct { + Size int64 `json:"Size"` + ModTime string `json:"ModTime"` + Sha1 string `json:"Sha1"` + Sha256 string `json:"Sha256"` + Md5 string `json:"Md5"` +} + // IsScanning returns true is a scan is already in progress for the given mirror func IsScanning(conn redis.Conn, identifier string) (bool, error) { return redis.Bool(conn.Do("EXISTS", fmt.Sprintf("SCANNING_%s", identifier))) @@ -229,6 +238,7 @@ func (s *scan) setLastSync(conn redis.Conn, identifier string, successful bool) } type sourcescanner struct { + dummyFile bool } // Walk inside the source/reference repository @@ -237,11 +247,45 @@ func (s *sourcescanner) walkSource(conn redis.Conn, path string, f os.FileInfo, return nil, nil } + var dfData DummyFile + dummyFile := s.dummyFile + d := new(filedata) d.path = path[len(GetConfig().Repository):] - d.size = f.Size() - d.modTime = f.ModTime() + if dummyFile { + file, err := os.Open(path) + if err != nil { + log.Errorf(err.Error()) + } + dec := json.NewDecoder(file) + // read open bracket, when there is none, the file is not a json. + // Fallback to normal mode. + _, err = dec.Token() + if err != nil { + goto skipdec + } + err = dec.Decode(&dfData) + skipdec: + if err != nil { + log.Debugf("Failed to read file: %s", err.Error()) + dummyFile = false + d.size = f.Size() + d.modTime = f.ModTime() + goto skip + } + + d.size = dfData.Size + d.modTime, err = time.Parse("2006-01-02 15:04:05.999999999 -0700 MST", dfData.ModTime) + if err != nil { + log.Errorf(err.Error()) + } + } else { + d.size = f.Size() + d.modTime = f.ModTime() + } + +skip: // Get the previous file properties properties, err := redis.Strings(conn.Do("HMGET", fmt.Sprintf("FILE_%s", d.path), "size", "modTime", "sha1", "sha256", "md5")) if err != nil && err != redis.ErrNil { @@ -263,21 +307,27 @@ func (s *sourcescanner) walkSource(conn redis.Conn, path string, f os.FileInfo, (GetConfig().Hashes.MD5 && len(md5) == 0) if rehash || size != d.size || !modTime.Equal(d.modTime) { - h, err := filesystem.HashFile(GetConfig().Repository + d.path) - if err != nil { - log.Warningf("%s: hashing failed: %s", d.path, err.Error()) + if dummyFile { + d.sha1 = dfData.Sha1 + d.sha256 = dfData.Sha256 + d.md5 = dfData.Md5 } else { - d.sha1 = h.Sha1 - d.sha256 = h.Sha256 - d.md5 = h.Md5 - if len(d.sha1) > 0 { - log.Infof("%s: SHA1 %s", d.path, d.sha1) - } - if len(d.sha256) > 0 { - log.Infof("%s: SHA256 %s", d.path, d.sha256) - } - if len(d.md5) > 0 { - log.Infof("%s: MD5 %s", d.path, d.md5) + h, err := filesystem.HashFile(GetConfig().Repository + d.path) + if err != nil { + log.Warningf("%s: hashing failed: %s", d.path, err.Error()) + } else { + d.sha1 = h.Sha1 + d.sha256 = h.Sha256 + d.md5 = h.Md5 + if len(d.sha1) > 0 { + log.Infof("%s: SHA1 %s", d.path, d.sha1) + } + if len(d.sha256) > 0 { + log.Infof("%s: SHA256 %s", d.path, d.sha256) + } + if len(d.md5) > 0 { + log.Infof("%s: MD5 %s", d.path, d.md5) + } } } } else { @@ -292,6 +342,7 @@ func (s *sourcescanner) walkSource(conn redis.Conn, path string, f os.FileInfo, // ScanSource starts a scan of the local repository func ScanSource(r *database.Redis, forceRehash bool, stop chan bool) (err error) { s := &sourcescanner{} + s.dummyFile = GetConfig().DummyFiles conn := r.Get() defer conn.Close()