mirror of
https://github.com/paleotronic/diskm8.git
synced 2024-12-22 09:30:12 +00:00
411 lines
8.1 KiB
Go
411 lines
8.1 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/signal"
|
|
"sync"
|
|
)
|
|
|
|
const EMPTYSECTOR = "5341e6b2646979a70e57653007a1f310169421ec9bdd9f1a5648f75ade005af1"
|
|
|
|
func GetAllDiskSectors(pattern string, pathfilter []string) map[string]DiskSectors {
|
|
|
|
cache := make(map[string]DiskSectors)
|
|
|
|
exists, matches := existsPattern(*baseName, pathfilter, pattern)
|
|
if !exists {
|
|
return cache
|
|
}
|
|
|
|
workchan := make(chan string, 100)
|
|
var s sync.Mutex
|
|
var wg sync.WaitGroup
|
|
|
|
for i := 0; i < ingestWorkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
for m := range workchan {
|
|
item := &Disk{}
|
|
if err := item.ReadFromFile(m); err == nil {
|
|
|
|
//
|
|
chunk := append(item.ActiveSectors, item.InactiveSectors...)
|
|
tmp := make(DiskSectors, 0)
|
|
for _, v := range chunk {
|
|
if v.SHA256 != EMPTYSECTOR {
|
|
tmp = append(tmp, v)
|
|
} else {
|
|
fmt.Printf("%s: throw away zero sector T%d,S%d\n", item.Filename, v.Track, v.Sector)
|
|
}
|
|
}
|
|
|
|
// Load cache
|
|
s.Lock()
|
|
cache[item.FullPath] = tmp
|
|
s.Unlock()
|
|
|
|
}
|
|
}
|
|
wg.Done()
|
|
}()
|
|
}
|
|
|
|
var lastPc int = -1
|
|
for i, m := range matches {
|
|
|
|
workchan <- m
|
|
|
|
pc := int(100 * float64(i) / float64(len(matches)))
|
|
|
|
if pc != lastPc {
|
|
fmt.Print("\r")
|
|
os.Stderr.WriteString(fmt.Sprintf("Caching disk sector data... %d%% ", pc))
|
|
}
|
|
|
|
lastPc = pc
|
|
}
|
|
close(workchan)
|
|
|
|
wg.Wait()
|
|
|
|
return cache
|
|
}
|
|
|
|
func GetActiveDiskSectors(pattern string, pathfilter []string) map[string]DiskSectors {
|
|
|
|
cache := make(map[string]DiskSectors)
|
|
|
|
exists, matches := existsPattern(*baseName, pathfilter, pattern)
|
|
if !exists {
|
|
return cache
|
|
}
|
|
|
|
workchan := make(chan string, 100)
|
|
var s sync.Mutex
|
|
var wg sync.WaitGroup
|
|
|
|
for i := 0; i < ingestWorkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
for m := range workchan {
|
|
item := &Disk{}
|
|
if err := item.ReadFromFile(m); err == nil {
|
|
|
|
// Load cache
|
|
s.Lock()
|
|
cache[item.FullPath] = item.ActiveSectors
|
|
s.Unlock()
|
|
|
|
}
|
|
}
|
|
wg.Done()
|
|
}()
|
|
}
|
|
|
|
var lastPc int = -1
|
|
for i, m := range matches {
|
|
|
|
workchan <- m
|
|
|
|
pc := int(100 * float64(i) / float64(len(matches)))
|
|
|
|
if pc != lastPc {
|
|
fmt.Print("\r")
|
|
os.Stderr.WriteString(fmt.Sprintf("Caching disk sector data... %d%% ", pc))
|
|
}
|
|
|
|
lastPc = pc
|
|
}
|
|
close(workchan)
|
|
|
|
wg.Wait()
|
|
|
|
return cache
|
|
}
|
|
|
|
func GetSectorMap(d DiskSectors) map[string]*DiskSector {
|
|
|
|
out := make(map[string]*DiskSector)
|
|
for _, v := range d {
|
|
out[fmt.Sprintf("T%d,S%d", v.Track, v.Sector)] = v
|
|
}
|
|
return out
|
|
|
|
}
|
|
|
|
type SectorOverlapRecord struct {
|
|
same map[string]map[*DiskSector]*DiskSector
|
|
percent map[string]float64
|
|
missing map[string][]*DiskSector
|
|
extras map[string][]*DiskSector
|
|
}
|
|
|
|
func (f *SectorOverlapRecord) Remove(key string) {
|
|
delete(f.same, key)
|
|
delete(f.percent, key)
|
|
delete(f.missing, key)
|
|
delete(f.extras, key)
|
|
}
|
|
|
|
func (f *SectorOverlapRecord) IsSubsetOf(filename string) bool {
|
|
|
|
// f is a subset if:
|
|
// missing == 0
|
|
// extra > 0
|
|
|
|
if _, ok := f.same[filename]; !ok {
|
|
return false
|
|
}
|
|
|
|
return len(f.extras[filename]) > 0 && len(f.missing[filename]) == 0
|
|
|
|
}
|
|
|
|
func (f *SectorOverlapRecord) IsSupersetOf(filename string) bool {
|
|
|
|
// f is a superset if:
|
|
// missing > 0
|
|
// extra == 0
|
|
|
|
if _, ok := f.same[filename]; !ok {
|
|
return false
|
|
}
|
|
|
|
return len(f.extras[filename]) == 0 && len(f.missing[filename]) > 0
|
|
|
|
}
|
|
|
|
func CompareSectors(d, b DiskSectors, r *SectorOverlapRecord, key string) float64 {
|
|
|
|
var sameSectors float64
|
|
var missingSectors float64
|
|
var extraSectors float64
|
|
|
|
var dmap = GetSectorMap(d)
|
|
var bmap = GetSectorMap(b)
|
|
|
|
for fileCk, info := range dmap {
|
|
|
|
binfo, bEx := bmap[fileCk]
|
|
|
|
if bEx && info.SHA256 == binfo.SHA256 {
|
|
sameSectors += 1
|
|
if r.same[key] == nil {
|
|
r.same[key] = make(map[*DiskSector]*DiskSector)
|
|
}
|
|
|
|
r.same[key][binfo] = info
|
|
} else {
|
|
missingSectors += 1
|
|
if r.missing[key] == nil {
|
|
r.missing[key] = make([]*DiskSector, 0)
|
|
}
|
|
r.missing[key] = append(r.missing[key], info)
|
|
}
|
|
|
|
}
|
|
|
|
for fileCk, info := range bmap {
|
|
|
|
_, dEx := dmap[fileCk]
|
|
|
|
if !dEx {
|
|
extraSectors += 1
|
|
// file match
|
|
if r.extras[key] == nil {
|
|
r.extras[key] = make([]*DiskSector, 0)
|
|
}
|
|
//fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename)
|
|
r.extras[key] = append(r.extras[key], info)
|
|
}
|
|
|
|
}
|
|
|
|
if (sameSectors + extraSectors + missingSectors) == 0 {
|
|
return 0
|
|
}
|
|
|
|
// return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal
|
|
return sameSectors / (sameSectors + extraSectors + missingSectors)
|
|
|
|
}
|
|
|
|
// Actual fuzzy file match report
|
|
func CollectSectorOverlapsAboveThreshold(t float64, pathfilter []string, ff func(pattern string, pathfilter []string) map[string]DiskSectors) map[string]*SectorOverlapRecord {
|
|
|
|
filerecords := ff("*_*_*_*.fgp", pathfilter)
|
|
|
|
results := make(map[string]*SectorOverlapRecord)
|
|
|
|
workchan := make(chan string, 100)
|
|
var wg sync.WaitGroup
|
|
var s sync.Mutex
|
|
|
|
c := make(chan os.Signal, 1)
|
|
signal.Notify(c, os.Interrupt)
|
|
|
|
for i := 0; i < processWorkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
for m := range workchan {
|
|
|
|
v := &SectorOverlapRecord{
|
|
same: make(map[string]map[*DiskSector]*DiskSector),
|
|
percent: make(map[string]float64),
|
|
missing: make(map[string][]*DiskSector),
|
|
extras: make(map[string][]*DiskSector),
|
|
}
|
|
|
|
d := filerecords[m]
|
|
|
|
for k, b := range filerecords {
|
|
if k == m {
|
|
continue // dont compare ourselves
|
|
}
|
|
// ok good to compare -- only keep if we need our threshold
|
|
|
|
if closeness := CompareSectors(d, b, v, k); closeness < t {
|
|
v.Remove(k)
|
|
} else {
|
|
v.percent[k] = closeness
|
|
}
|
|
}
|
|
|
|
// since we delete < threshold, only add if we have any result
|
|
if len(v.percent) > 0 {
|
|
//os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n")
|
|
s.Lock()
|
|
results[m] = v
|
|
s.Unlock()
|
|
}
|
|
|
|
}
|
|
wg.Done()
|
|
}()
|
|
}
|
|
|
|
// feed data in
|
|
var lastPc int = -1
|
|
var i int
|
|
for k, _ := range filerecords {
|
|
|
|
if len(c) > 0 {
|
|
sig := <-c
|
|
if sig == os.Interrupt {
|
|
close(c)
|
|
os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n")
|
|
break
|
|
}
|
|
}
|
|
|
|
workchan <- k
|
|
|
|
pc := int(100 * float64(i) / float64(len(filerecords)))
|
|
|
|
if pc != lastPc {
|
|
fmt.Print("\r")
|
|
os.Stderr.WriteString(fmt.Sprintf("Processing sectors data... %d%% ", pc))
|
|
}
|
|
|
|
lastPc = pc
|
|
i++
|
|
}
|
|
|
|
close(workchan)
|
|
wg.Wait()
|
|
|
|
return results
|
|
|
|
}
|
|
|
|
// Actual fuzzy file match report
|
|
func CollectSectorSubsets(pathfilter []string, ff func(pattern string, pathfilter []string) map[string]DiskSectors) map[string]*SectorOverlapRecord {
|
|
|
|
filerecords := ff("*_*_*_*.fgp", pathfilter)
|
|
|
|
results := make(map[string]*SectorOverlapRecord)
|
|
|
|
workchan := make(chan string, 100)
|
|
var wg sync.WaitGroup
|
|
var s sync.Mutex
|
|
|
|
c := make(chan os.Signal, 1)
|
|
signal.Notify(c, os.Interrupt)
|
|
|
|
for i := 0; i < processWorkers; i++ {
|
|
wg.Add(1)
|
|
go func() {
|
|
for m := range workchan {
|
|
|
|
v := &SectorOverlapRecord{
|
|
same: make(map[string]map[*DiskSector]*DiskSector),
|
|
percent: make(map[string]float64),
|
|
missing: make(map[string][]*DiskSector),
|
|
extras: make(map[string][]*DiskSector),
|
|
}
|
|
|
|
d := filerecords[m]
|
|
|
|
for k, b := range filerecords {
|
|
if k == m {
|
|
continue // dont compare ourselves
|
|
}
|
|
// ok good to compare -- only keep if we need our threshold
|
|
|
|
closeness := CompareSectors(d, b, v, k)
|
|
|
|
if !v.IsSubsetOf(k) {
|
|
v.Remove(k)
|
|
} else {
|
|
v.percent[k] = closeness
|
|
}
|
|
}
|
|
|
|
// since we delete < threshold, only add if we have any result
|
|
if len(v.percent) > 0 {
|
|
//os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n")
|
|
s.Lock()
|
|
results[m] = v
|
|
s.Unlock()
|
|
}
|
|
|
|
}
|
|
wg.Done()
|
|
}()
|
|
}
|
|
|
|
// feed data in
|
|
var lastPc int = -1
|
|
var i int
|
|
for k, _ := range filerecords {
|
|
|
|
if len(c) > 0 {
|
|
sig := <-c
|
|
if sig == os.Interrupt {
|
|
close(c)
|
|
os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n")
|
|
break
|
|
}
|
|
}
|
|
|
|
workchan <- k
|
|
|
|
pc := int(100 * float64(i) / float64(len(filerecords)))
|
|
|
|
if pc != lastPc {
|
|
fmt.Print("\r")
|
|
os.Stderr.WriteString(fmt.Sprintf("Processing sectors data... %d%% ", pc))
|
|
}
|
|
|
|
lastPc = pc
|
|
i++
|
|
}
|
|
|
|
close(workchan)
|
|
wg.Wait()
|
|
|
|
return results
|
|
|
|
}
|