GitLab is used only for code review, issue tracking and project management. Canonical locations for source code are still https://gitweb.torproject.org/ https://git.torproject.org/ and git-rw.torproject.org.

Implement Prometheus-based metrics.

The way Prometheus works is that a central instance scrapes metrics from
various endpoints, bridgestrap being one of them.  We expose a new HTTP
handler for /metrics and let our Prometheus library do the rest.

This patch fixes #4
parent cf1bce74
......@@ -47,6 +47,24 @@ func bridgeLineToAddrPort(bridgeLine string) (string, error) {
}
}
// FracFunctional returns the fraction of bridges currently in the cache that
// are functional.
func (tc *TestCache) FracFunctional() float64 {
if len((*tc).Entries) == 0 {
return 0
}
numFunctional := 0
for _, entry := range (*tc).Entries {
if entry.Error == "" {
numFunctional++
}
}
return float64(numFunctional) / float64(len((*tc).Entries))
}
// WriteToDisk writes our test result cache to disk, allowing it to persist
// across program restarts.
func (tc *TestCache) WriteToDisk(cacheFile string) error {
......@@ -134,4 +152,6 @@ func (tc *TestCache) AddEntry(bridgeLine string, result error, lastTested time.T
tc.l.Lock()
(*tc).Entries[addrPort] = &CacheEntry{errorStr, lastTested}
tc.l.Unlock()
metrics.FracFunctional.Set((*tc).FracFunctional())
}
......@@ -11,6 +11,10 @@ import (
"time"
)
func init() {
InitMetrics()
}
func NewCache() *TestCache {
return &TestCache{
Entries: make(map[string]*CacheEntry),
......@@ -55,6 +59,21 @@ func TestCacheFunctions(t *testing.T) {
}
}
func TestCacheFracFunctional(t *testing.T) {
cache := NewCache()
cache.AddEntry("1.1.1.1:1", nil, time.Now().UTC())
cache.AddEntry("2.2.2.2:2", nil, time.Now().UTC())
cache.AddEntry("3.3.3.3:3", nil, time.Now().UTC())
cache.AddEntry("4.4.4.4:4", errors.New("error"), time.Now().UTC())
expected := 0.75
if cache.FracFunctional() != expected {
t.Errorf("Expected fraction %.2f but got %.2f.", expected, cache.FracFunctional())
}
}
func TestCacheExpiration(t *testing.T) {
cache := NewCache()
......
......@@ -3,9 +3,9 @@ module gitlab.torproject.org/tpo/anti-censorship/bridgestrap
go 1.13
require (
git.torproject.org/pluggable-transports/snowflake.git v0.0.0-20191213231743-37aaaffa1521
github.com/gorilla/mux v1.7.3
git.torproject.org/pluggable-transports/snowflake.git v0.0.0-20201120061516-ece43cbfcfc3
github.com/gorilla/mux v1.8.0
github.com/prometheus/client_golang v1.8.0
github.com/yawning/bulb v0.0.0-20170405033506-85d80d893c3d
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa // indirect
golang.org/x/time v0.0.0-20191024005414-555d28b269f0
golang.org/x/time v0.0.0-20200630173020-3af7569d3a1e
)
This diff is collapsed.
......@@ -100,12 +100,14 @@ func testBridgeLines(req *TestRequest) *TestResult {
for _, bridgeLine := range req.BridgeLines {
if entry := cache.IsCached(bridgeLine); entry != nil {
numCached++
metrics.CacheHits.Inc()
result.Bridges[bridgeLine] = &BridgeTest{
Functional: entry.Error == "",
LastTested: entry.Time,
Error: entry.Error,
}
} else {
metrics.CacheMisses.Inc()
remainingBridgeLines = append(remainingBridgeLines, bridgeLine)
}
}
......@@ -125,6 +127,11 @@ func testBridgeLines(req *TestRequest) *TestResult {
// Cache partial test results and add them to our existing result object.
for bridgeLine, bridgeTest := range partialResult.Bridges {
cache.AddEntry(bridgeLine, errors.New(bridgeTest.Error), bridgeTest.LastTested)
if bridgeTest.Functional {
metrics.NumFunctionalBridges.Inc()
} else {
metrics.NumDysfunctionalBridges.Inc()
}
result.Bridges[bridgeLine] = bridgeTest
}
} else {
......@@ -147,11 +154,14 @@ func testBridgeLines(req *TestRequest) *TestResult {
numDysfunctional,
float64(numDysfunctional)/float64(len(result.Bridges))*100)
metrics.CacheSize.Set(float64(len(cache.Entries)))
return result
}
func BridgeState(w http.ResponseWriter, r *http.Request) {
metrics.ApiNumRequests.Inc()
b, err := ioutil.ReadAll(r.Body)
defer r.Body.Close()
if err != nil {
......@@ -173,6 +183,7 @@ func BridgeState(w http.ResponseWriter, r *http.Request) {
return
}
metrics.ApiNumValidRequests.Inc()
if len(req.BridgeLines) > MaxBridgesPerReq {
log.Printf("Got %d bridges in request but we only allow <= %d.", len(req.BridgeLines), MaxBridgesPerReq)
http.Error(w, fmt.Sprintf("maximum of %d bridge lines allowed", MaxBridgesPerReq), http.StatusBadRequest)
......@@ -193,6 +204,7 @@ func BridgeState(w http.ResponseWriter, r *http.Request) {
func BridgeStateWeb(w http.ResponseWriter, r *http.Request) {
metrics.WebNumRequests.Inc()
r.ParseForm()
// Rate-limit Web requests to prevent someone from abusing this service
// as a port scanner.
......@@ -206,6 +218,7 @@ func BridgeStateWeb(w http.ResponseWriter, r *http.Request) {
return
}
metrics.WebNumValidRequests.Inc()
result := testBridgeLines(&TestRequest{BridgeLines: []string{bridgeLine}})
bridgeResult, exists := result.Bridges[bridgeLine]
if !exists {
......
......@@ -14,12 +14,13 @@ import (
"git.torproject.org/pluggable-transports/snowflake.git/common/safelog"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
const (
// BridgestrapVersion specifies bridgestrap's version. The version number
// is based on semantic versioning: https://semver.org
BridgestrapVersion = "0.2.0"
BridgestrapVersion = "0.3.0"
)
type Route struct {
......@@ -86,6 +87,7 @@ func NewRouter() *mux.Router {
Name(route.Name).
Handler(handler)
}
router.Path("/metrics").Handler(promhttp.Handler())
return router
}
......@@ -194,6 +196,9 @@ func main() {
return
}
log.Printf("Initialising Prometheus metrics.")
InitMetrics()
var srv http.Server
srv.Addr = addr
srv.Handler = NewRouter()
......
package main
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
const (
PrometheusNamespace = "bridgestrap"
)
type Metrics struct {
OrconnLaunched prometheus.Counter
CacheHits prometheus.Counter
CacheMisses prometheus.Counter
CacheSize prometheus.Gauge
PendingReqs prometheus.Gauge
FracFunctional prometheus.Gauge
ApiNumRequests prometheus.Counter
ApiNumValidRequests prometheus.Counter
WebNumRequests prometheus.Counter
WebNumValidRequests prometheus.Counter
NumFunctionalBridges prometheus.Counter
NumDysfunctionalBridges prometheus.Counter
TorTestTime prometheus.Histogram
}
var metrics *Metrics
// InitMetrics initialises our Prometheus metrics.
func InitMetrics() {
metrics = &Metrics{}
metrics.OrconnLaunched = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "tor_events_orconn_launched",
Help: "The number of ORCONN launch events",
})
metrics.PendingReqs = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: PrometheusNamespace,
Name: "pending_requests",
Help: "The number of pending requests",
})
metrics.FracFunctional = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: PrometheusNamespace,
Name: "fraction_functional",
Help: "The fraction of functional bridges currently in the cache",
})
metrics.CacheHits = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "cache_hits",
Help: "The number of requests that hit the cache",
})
metrics.CacheMisses = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "cache_misses",
Help: "The number of requests that missed the cache",
})
metrics.CacheSize = promauto.NewGauge(prometheus.GaugeOpts{
Namespace: PrometheusNamespace,
Name: "cache_size",
Help: "The number of cached elements",
})
metrics.ApiNumRequests = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "api_num_requests",
Help: "The number of API requests",
})
metrics.ApiNumValidRequests = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "api_num_validrequests",
Help: "The number of valid API requests",
})
metrics.WebNumRequests = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "web_num_requests",
Help: "The number of Web requests",
})
metrics.WebNumValidRequests = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "web_num_valid_requests",
Help: "The number of valid Web requests",
})
metrics.NumFunctionalBridges = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "num_functional_bridges",
Help: "The number of functional bridges",
})
metrics.NumDysfunctionalBridges = promauto.NewCounter(prometheus.CounterOpts{
Namespace: PrometheusNamespace,
Name: "num_dysfunctional_bridges",
Help: "The number of dysfunctional bridges",
})
buckets := []float64{}
TorTestTimeout.Seconds()
for i := 0.5; i < TorTestTimeout.Seconds(); i *= 2 {
buckets = append(buckets, i)
}
buckets = append(buckets, TorTestTimeout.Seconds())
metrics.TorTestTime = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: PrometheusNamespace,
Name: "tor_test_time",
Help: "The time it took to finish bridge tests",
Buckets: buckets,
})
}
......@@ -292,6 +292,7 @@ func (c *TorContext) dispatcher() {
select {
case req := <-c.RequestQueue:
log.Printf("%d pending test requests.", len(c.RequestQueue))
metrics.PendingReqs.Set(float64(len(c.RequestQueue)))
start := time.Now()
result := c.TestBridgeLines(req.BridgeLines)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment