Loading doc/resource-testing.md +1 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ When a resource is first added to rdsys, it is in state "untested". Once it's tested, it's either in state "functional" or "dysfunctional". The same mechanism is being used to request onbasca to test the bandwidth of the resource and provide a ratio. Onbasca does test the resources asyncronosly, the resource and provide a ratio. Onbasca does test the resources asyncronously, instead of testing them at the moment of the request, the response to the request includes the last tested ratio or a 0 if this resource hasn't being tested yet. Rdsys does distribute every resource that has a ratio higher than Loading internal/kraken.go +94 −17 Original line number Diff line number Diff line Loading @@ -10,8 +10,10 @@ import ( "fmt" "io" "log" "math" "net" "os" "reflect" "strconv" "strings" "time" Loading @@ -37,6 +39,11 @@ var ( NotEnoughRunningError = errors.New("There is not enough running bridges") ) type flicker struct { speed int flickered bool } func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendContext) { log.Println("Initialising resource kraken.") ticker := time.NewTicker(KrakenTickerInterval) Loading @@ -47,10 +54,9 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC // Immediately parse bridge descriptor when we're called, and let caller // know when we're done. reloadBridgeDescriptors(cfg, rcol, testFunc, bCtx.metrics) calcTestedResources(bCtx.metrics, rcol) currentRatios := calcTestedResources(bCtx.metrics, nil, rcol) ready <- true bCtx.metrics.updateDistributors(cfg, rcol) for { select { case <-shutdown: Loading @@ -60,7 +66,8 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC log.Println("Kraken's ticker is ticking.") reloadBridgeDescriptors(cfg, rcol, testFunc, bCtx.metrics) pruneExpiredResources(bCtx.metrics, rcol) calcTestedResources(bCtx.metrics, rcol) // pruneFailingResources(bCtx.metrics, rcol) currentRatios = calcTestedResources(bCtx.metrics, currentRatios, rcol) bCtx.metrics.updateDistributors(cfg, rcol) log.Printf("Backend resources: %s", rcol) } Loading @@ -71,8 +78,9 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC // resource type and exposes them via Prometheus. The function can tell us // that e.g. among all obfs4 bridges, 0.2 are untested, 0.7 are functional, and // 0.1 are dysfunctional. func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { func calcTestedResources(metrics *Metrics, currentRatios map[core.Hashkey]flicker, rcol *core.BackendResources) map[core.Hashkey]flicker { newRatios := make(map[core.Hashkey]flicker) // Map our numerical resource states to human-friendly strings. toStr := map[int]string{ core.StateUntested: "untested", Loading @@ -80,6 +88,13 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { core.StateDysfunctional: "dysfunctional", } // Map our numerical resource states to human-friendly strings. toStrRatio := map[int]string{ core.StateUntested: "untested", core.StateFunctional: "accepted", core.StateDysfunctional: "rejected", } functionalFractionAcc := 0. ratioFractionAcc := 0. for rName, hashring := range rcol.Collection { Loading @@ -88,23 +103,41 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { core.StateFunctional: 0, core.StateDysfunctional: 0, } ratioCount := map[string]int{ "untested": 0, "accepted": 0, "rejected": 0, ratioCount := map[int]int{ core.SpeedUntested: 0, core.SpeedAccepted: 0, core.SpeedRejected: 0, } for _, r := range hashring.GetAll() { rTest := r.TestResult() stateCount[rTest.State] += 1 ratioCount[rTest.Speed] += 1 if rTest.Speed == 1 { newRatios[r.Uid()] = flicker{ speed: rTest.Speed, flickered: false, } } else { newRatios[r.Uid()] = flicker{ speed: rTest.Speed, flickered: false, } } if rTest.Ratio == nil { ratioCount["untested"] += 1 } else if *rTest.Ratio >= rcol.BandwidthRatioThreshold { ratioCount["accepted"] += 1 histRatio := 0.0 maxRatioVal := 3.0 minRatioVal := 0.0 if rTest.Ratio != nil { if *rTest.Ratio <= minRatioVal { histRatio = minRatioVal } else if *rTest.Ratio >= maxRatioVal { histRatio = maxRatioVal } else { ratioCount["rejected"] += 1 histRatio = math.Round(*rTest.Ratio*10) / 10 } } metrics.RatiosSeen.Observe(histRatio) } for state, num := range stateCount { frac := float64(num) / float64(hashring.Len()) Loading @@ -115,14 +148,45 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { } for state, num := range ratioCount { frac := float64(num) / float64(hashring.Len()) metrics.BandwidthRatioResources.With(prometheus.Labels{"type": rName, "status": state}).Set(frac) if state == "accepted" { metrics.BandwidthRatioResources.With(prometheus.Labels{"type": rName, "status": toStrRatio[state]}).Set(frac) if state == core.SpeedAccepted { ratioFractionAcc += frac } } // Check for resources that have changed between accepted/rejected if currentRatios != nil { if len(currentRatios) == len(newRatios) && reflect.DeepEqual(currentRatios, newRatios) { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": "None", "flickered": "None"}) } else { for fingerprint, newFlicker := range newRatios { currentFlicker, found := currentRatios[fingerprint] if found { if currentFlicker.speed != newFlicker.speed { newRatios[fingerprint] = flicker{ speed: newFlicker.speed, flickered: true, } if currentFlicker.flickered && newFlicker.speed == 1 { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "ON"}) } else if currentFlicker.flickered && newFlicker.speed != 1 { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "OFF"}) } } } else { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "NEW"}) } } for fingerprint, _ := range currentRatios { _, found := newRatios[fingerprint] if !found { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "GONE"}) } } } } } // Distribute only functional resources if the fraction is high enough // Distribute only functional resources if the fraction is high enough. // The fraction might be low after a restart as many resources will be // untested or if there is an issue with bridgestrap. functionalFraction := functionalFractionAcc / float64(len(rcol.Collection)) Loading @@ -142,6 +206,8 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { } else { metrics.IgnoringBandwidthRatio.Set(1) } return newRatios } func pruneExpiredResources(metrics *Metrics, rcol *core.BackendResources) { Loading @@ -156,6 +222,17 @@ func pruneExpiredResources(metrics *Metrics, rcol *core.BackendResources) { } } /*func pruneFailingResources(metrics *Metrics, rcol *core.BackendResources) { for rName, hashring := range rcol.Collection { origLen := hashring.Len() prunedResources := rcol.PruneFailing(rName) if len(prunedResources) > 0 { log.Printf("Pruned %d out of %d resources from %s hashring.", len(prunedResources), origLen, rName) } metrics.Resources.With(prometheus.Labels{"type": rName}).Set(float64(hashring.Len())) } }*/ // reloadBridgeDescriptors reloads bridge descriptors from the given // cached-extrainfo file and its corresponding cached-extrainfo.new. func reloadBridgeDescriptors(cfg *Config, rcol *core.BackendResources, testFunc resources.TestFunc, metrics *Metrics) { Loading internal/kraken_test.go +2 −2 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ func TestOnlyFunctional(t *testing.T) { } reloadBridgeDescriptors(&testCfg, rcol, nil, metrics) calcTestedResources(metrics, rcol) currentRatios := calcTestedResources(metrics, nil, rcol) if rcol.OnlyFunctional { t.Errorf("OnlyFunctional flag enabled when most resources are untested") } Loading Loading @@ -189,7 +189,7 @@ func TestOnlyFunctional(t *testing.T) { r.TestResult().State = core.StateFunctional } } calcTestedResources(metrics, rcol) calcTestedResources(metrics, currentRatios, rcol) if !rcol.OnlyFunctional { t.Errorf("OnlyFunctional flag disabled when most resources are functional") } Loading internal/metrics.go +20 −1 Original line number Diff line number Diff line Loading @@ -28,6 +28,8 @@ type Metrics struct { BandwidthRatioResources *prometheus.GaugeVec IgnoringBandwidthRatio prometheus.Gauge IgnoringBridgeDescriptors prometheus.Gauge FlickeringBandwidth *prometheus.GaugeVec RatiosSeen prometheus.Histogram Resources *prometheus.GaugeVec DistributorResources *prometheus.GaugeVec Requests *prometheus.CounterVec Loading @@ -51,7 +53,7 @@ func InitMetrics() *Metrics { prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Name: "distributing_non_functional_resources", Help: "If rdsys is distribution non functional bridges", Help: "If rdsys is distributing non functional bridges", }, ) Loading @@ -72,6 +74,23 @@ func InitMetrics() *Metrics { }, ) metrics.FlickeringBandwidth = promauto.NewGaugeVec( prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Name: "flickering_bandwidth", Help: "The number of resources that have changed from acceptable to rejected bandwidths", }, []string{"fingerprint", "flickered"}, ) metrics.RatiosSeen = promauto.NewHistogram( prometheus.HistogramOpts{ Namespace: PrometheusNamespace, Name: "ratio_seen", Help: "The different bandwidth ratios that were observed", }, ) metrics.IgnoringBridgeDescriptors = promauto.NewGauge( prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Loading pkg/core/domain.go +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,17 @@ const ( StateDysfunctional ) const ( // The following constants act as a crude representation of the bandwidth speed // that a resource can have. This is meant as a way to check whether or not the ratio // meets the bandwidth threshold for the given resource without requiring the context. // Before an onbasca test, a resource's speed is untested. Afterwards, it's either // sufficient or insufficient. SpeedUntested = iota SpeedAccepted SpeedRejected ) // Resource specifies the resources that rdsys hands out to users. This could // be a vanilla Tor bridge, and obfs4 bridge, a Snowflake proxy, and even Tor // Browser links. Your imagination is the limit. Loading Loading @@ -56,6 +67,7 @@ type Resource interface { // https://gitlab.torproject.org/tpo/network-health/onbasca/ type ResourceTest struct { State int Speed int Ratio *float64 LastTested time.Time Error string Loading Loading
doc/resource-testing.md +1 −1 Original line number Diff line number Diff line Loading @@ -14,7 +14,7 @@ When a resource is first added to rdsys, it is in state "untested". Once it's tested, it's either in state "functional" or "dysfunctional". The same mechanism is being used to request onbasca to test the bandwidth of the resource and provide a ratio. Onbasca does test the resources asyncronosly, the resource and provide a ratio. Onbasca does test the resources asyncronously, instead of testing them at the moment of the request, the response to the request includes the last tested ratio or a 0 if this resource hasn't being tested yet. Rdsys does distribute every resource that has a ratio higher than Loading
internal/kraken.go +94 −17 Original line number Diff line number Diff line Loading @@ -10,8 +10,10 @@ import ( "fmt" "io" "log" "math" "net" "os" "reflect" "strconv" "strings" "time" Loading @@ -37,6 +39,11 @@ var ( NotEnoughRunningError = errors.New("There is not enough running bridges") ) type flicker struct { speed int flickered bool } func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendContext) { log.Println("Initialising resource kraken.") ticker := time.NewTicker(KrakenTickerInterval) Loading @@ -47,10 +54,9 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC // Immediately parse bridge descriptor when we're called, and let caller // know when we're done. reloadBridgeDescriptors(cfg, rcol, testFunc, bCtx.metrics) calcTestedResources(bCtx.metrics, rcol) currentRatios := calcTestedResources(bCtx.metrics, nil, rcol) ready <- true bCtx.metrics.updateDistributors(cfg, rcol) for { select { case <-shutdown: Loading @@ -60,7 +66,8 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC log.Println("Kraken's ticker is ticking.") reloadBridgeDescriptors(cfg, rcol, testFunc, bCtx.metrics) pruneExpiredResources(bCtx.metrics, rcol) calcTestedResources(bCtx.metrics, rcol) // pruneFailingResources(bCtx.metrics, rcol) currentRatios = calcTestedResources(bCtx.metrics, currentRatios, rcol) bCtx.metrics.updateDistributors(cfg, rcol) log.Printf("Backend resources: %s", rcol) } Loading @@ -71,8 +78,9 @@ func InitKraken(cfg *Config, shutdown chan bool, ready chan bool, bCtx *BackendC // resource type and exposes them via Prometheus. The function can tell us // that e.g. among all obfs4 bridges, 0.2 are untested, 0.7 are functional, and // 0.1 are dysfunctional. func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { func calcTestedResources(metrics *Metrics, currentRatios map[core.Hashkey]flicker, rcol *core.BackendResources) map[core.Hashkey]flicker { newRatios := make(map[core.Hashkey]flicker) // Map our numerical resource states to human-friendly strings. toStr := map[int]string{ core.StateUntested: "untested", Loading @@ -80,6 +88,13 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { core.StateDysfunctional: "dysfunctional", } // Map our numerical resource states to human-friendly strings. toStrRatio := map[int]string{ core.StateUntested: "untested", core.StateFunctional: "accepted", core.StateDysfunctional: "rejected", } functionalFractionAcc := 0. ratioFractionAcc := 0. for rName, hashring := range rcol.Collection { Loading @@ -88,23 +103,41 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { core.StateFunctional: 0, core.StateDysfunctional: 0, } ratioCount := map[string]int{ "untested": 0, "accepted": 0, "rejected": 0, ratioCount := map[int]int{ core.SpeedUntested: 0, core.SpeedAccepted: 0, core.SpeedRejected: 0, } for _, r := range hashring.GetAll() { rTest := r.TestResult() stateCount[rTest.State] += 1 ratioCount[rTest.Speed] += 1 if rTest.Speed == 1 { newRatios[r.Uid()] = flicker{ speed: rTest.Speed, flickered: false, } } else { newRatios[r.Uid()] = flicker{ speed: rTest.Speed, flickered: false, } } if rTest.Ratio == nil { ratioCount["untested"] += 1 } else if *rTest.Ratio >= rcol.BandwidthRatioThreshold { ratioCount["accepted"] += 1 histRatio := 0.0 maxRatioVal := 3.0 minRatioVal := 0.0 if rTest.Ratio != nil { if *rTest.Ratio <= minRatioVal { histRatio = minRatioVal } else if *rTest.Ratio >= maxRatioVal { histRatio = maxRatioVal } else { ratioCount["rejected"] += 1 histRatio = math.Round(*rTest.Ratio*10) / 10 } } metrics.RatiosSeen.Observe(histRatio) } for state, num := range stateCount { frac := float64(num) / float64(hashring.Len()) Loading @@ -115,14 +148,45 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { } for state, num := range ratioCount { frac := float64(num) / float64(hashring.Len()) metrics.BandwidthRatioResources.With(prometheus.Labels{"type": rName, "status": state}).Set(frac) if state == "accepted" { metrics.BandwidthRatioResources.With(prometheus.Labels{"type": rName, "status": toStrRatio[state]}).Set(frac) if state == core.SpeedAccepted { ratioFractionAcc += frac } } // Check for resources that have changed between accepted/rejected if currentRatios != nil { if len(currentRatios) == len(newRatios) && reflect.DeepEqual(currentRatios, newRatios) { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": "None", "flickered": "None"}) } else { for fingerprint, newFlicker := range newRatios { currentFlicker, found := currentRatios[fingerprint] if found { if currentFlicker.speed != newFlicker.speed { newRatios[fingerprint] = flicker{ speed: newFlicker.speed, flickered: true, } if currentFlicker.flickered && newFlicker.speed == 1 { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "ON"}) } else if currentFlicker.flickered && newFlicker.speed != 1 { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "OFF"}) } } } else { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "NEW"}) } } for fingerprint, _ := range currentRatios { _, found := newRatios[fingerprint] if !found { metrics.FlickeringBandwidth.With(prometheus.Labels{"fingerprint": strconv.FormatUint(uint64(fingerprint), 10), "flickered": "GONE"}) } } } } } // Distribute only functional resources if the fraction is high enough // Distribute only functional resources if the fraction is high enough. // The fraction might be low after a restart as many resources will be // untested or if there is an issue with bridgestrap. functionalFraction := functionalFractionAcc / float64(len(rcol.Collection)) Loading @@ -142,6 +206,8 @@ func calcTestedResources(metrics *Metrics, rcol *core.BackendResources) { } else { metrics.IgnoringBandwidthRatio.Set(1) } return newRatios } func pruneExpiredResources(metrics *Metrics, rcol *core.BackendResources) { Loading @@ -156,6 +222,17 @@ func pruneExpiredResources(metrics *Metrics, rcol *core.BackendResources) { } } /*func pruneFailingResources(metrics *Metrics, rcol *core.BackendResources) { for rName, hashring := range rcol.Collection { origLen := hashring.Len() prunedResources := rcol.PruneFailing(rName) if len(prunedResources) > 0 { log.Printf("Pruned %d out of %d resources from %s hashring.", len(prunedResources), origLen, rName) } metrics.Resources.With(prometheus.Labels{"type": rName}).Set(float64(hashring.Len())) } }*/ // reloadBridgeDescriptors reloads bridge descriptors from the given // cached-extrainfo file and its corresponding cached-extrainfo.new. func reloadBridgeDescriptors(cfg *Config, rcol *core.BackendResources, testFunc resources.TestFunc, metrics *Metrics) { Loading
internal/kraken_test.go +2 −2 Original line number Diff line number Diff line Loading @@ -154,7 +154,7 @@ func TestOnlyFunctional(t *testing.T) { } reloadBridgeDescriptors(&testCfg, rcol, nil, metrics) calcTestedResources(metrics, rcol) currentRatios := calcTestedResources(metrics, nil, rcol) if rcol.OnlyFunctional { t.Errorf("OnlyFunctional flag enabled when most resources are untested") } Loading Loading @@ -189,7 +189,7 @@ func TestOnlyFunctional(t *testing.T) { r.TestResult().State = core.StateFunctional } } calcTestedResources(metrics, rcol) calcTestedResources(metrics, currentRatios, rcol) if !rcol.OnlyFunctional { t.Errorf("OnlyFunctional flag disabled when most resources are functional") } Loading
internal/metrics.go +20 −1 Original line number Diff line number Diff line Loading @@ -28,6 +28,8 @@ type Metrics struct { BandwidthRatioResources *prometheus.GaugeVec IgnoringBandwidthRatio prometheus.Gauge IgnoringBridgeDescriptors prometheus.Gauge FlickeringBandwidth *prometheus.GaugeVec RatiosSeen prometheus.Histogram Resources *prometheus.GaugeVec DistributorResources *prometheus.GaugeVec Requests *prometheus.CounterVec Loading @@ -51,7 +53,7 @@ func InitMetrics() *Metrics { prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Name: "distributing_non_functional_resources", Help: "If rdsys is distribution non functional bridges", Help: "If rdsys is distributing non functional bridges", }, ) Loading @@ -72,6 +74,23 @@ func InitMetrics() *Metrics { }, ) metrics.FlickeringBandwidth = promauto.NewGaugeVec( prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Name: "flickering_bandwidth", Help: "The number of resources that have changed from acceptable to rejected bandwidths", }, []string{"fingerprint", "flickered"}, ) metrics.RatiosSeen = promauto.NewHistogram( prometheus.HistogramOpts{ Namespace: PrometheusNamespace, Name: "ratio_seen", Help: "The different bandwidth ratios that were observed", }, ) metrics.IgnoringBridgeDescriptors = promauto.NewGauge( prometheus.GaugeOpts{ Namespace: PrometheusNamespace, Loading
pkg/core/domain.go +12 −0 Original line number Diff line number Diff line Loading @@ -20,6 +20,17 @@ const ( StateDysfunctional ) const ( // The following constants act as a crude representation of the bandwidth speed // that a resource can have. This is meant as a way to check whether or not the ratio // meets the bandwidth threshold for the given resource without requiring the context. // Before an onbasca test, a resource's speed is untested. Afterwards, it's either // sufficient or insufficient. SpeedUntested = iota SpeedAccepted SpeedRejected ) // Resource specifies the resources that rdsys hands out to users. This could // be a vanilla Tor bridge, and obfs4 bridge, a Snowflake proxy, and even Tor // Browser links. Your imagination is the limit. Loading Loading @@ -56,6 +67,7 @@ type Resource interface { // https://gitlab.torproject.org/tpo/network-health/onbasca/ type ResourceTest struct { State int Speed int Ratio *float64 LastTested time.Time Error string Loading