Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
package(default_visibility = ["//visibility:public"])
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/dapper/agent:all-srcs",
"//app/service/main/dapper/cmd:all-srcs",
"//app/service/main/dapper/collector:all-srcs",
"//app/service/main/dapper/conf:all-srcs",
"//app/service/main/dapper/dao:all-srcs",
"//app/service/main/dapper/model:all-srcs",
"//app/service/main/dapper/pkg/batchwrite:all-srcs",
"//app/service/main/dapper/pkg/collect:all-srcs",
"//app/service/main/dapper/pkg/deliver:all-srcs",
"//app/service/main/dapper/pkg/diskqueue:all-srcs",
"//app/service/main/dapper/pkg/pointwrite:all-srcs",
"//app/service/main/dapper/pkg/process:all-srcs",
"//app/service/main/dapper/pkg/util:all-srcs",
"//app/service/main/dapper/server/udpcollect:all-srcs",
],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,68 @@
### dapper-service
##### Version 3.1.1
>1. 如果没有配置就不启动 kafka collector
##### Version 3.1.0
>1. 添加 kafka collector
>2. 忽略旧 sdk 的 span 采样点
##### Version 3.0.1
>1. 修复 HTTP client opreation_name 过多的问题
##### Version 3.0.0
>1. dapper 重构, 重新设计存储格式,添加了 influxdb
##### Version 2.0.4
>1. 允许通过 family 搜索 span
##### Version 2.0.3
>1. depends 接口添加 cache
##### Version 2.0.2
>1. 优化 es 查询, 缩短默认时间范围到1小时避免超时
##### Version 2.0.1
>1. 修复 es mapping 不一致无法查询的问题
##### Version 2.0
>1.dapper 重构
##### Version 1.3.6
>1.优化 collect 日志避免写满磁盘
##### Version 1.3.5
>1. 升级 hbase client
##### Version 1.3.4
>1. 删除无用配置
>2. 接bm
##### Version 1.3.3
>1. 迁移目录
##### Version 1.3.2
>1. 移除statsd 模块
##### Version 1.3.1
>1. 修复循环依赖的接口
##### Version 1.3.0
>1. collect 支持处理聚合日志协议unxigram改为unix
##### Version 1.2.0
>1. 增加没有服务依赖的接口,单独返回
>2. 增加循环依赖服务接口
>3. 增加服务组件依赖接口查询服务最近12小时依赖的组件和服务title
##### Version 1.1.1
> 1.修改服务依赖图数据,没有调用方的服务自己依赖自己
##### Version 1.1.0
> 1.span时间修复
##### Version 1.0.0
> 1.初始化完成dapper服务的基本功能,agent和collect 分别是日志客户端收集器。收集trace发送到dapper

View File

@@ -0,0 +1,13 @@
# Owner
maojian
haoguanwei
weicheng
# Author
haoguanwei
zhoujixiang
weicheng
# Reviewer
maojian
haoguanwei

View File

@@ -0,0 +1,18 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- haoguanwei
- maojian
- weicheng
- zhoujixiang
labels:
- main
- service
- service/main/dapper
options:
no_parent_owners: true
reviewers:
- haoguanwei
- maojian
- weicheng
- zhoujixiang

View File

@@ -0,0 +1,29 @@
# dapper-collector 收集 trace 数据写入 influxdb 与 HBase
### Influxdb 存储格式
| measurement | tags | fields |
|-------------|----------------------------------------------------|-----------------------------------------------|
| span | service_name,operation_name,peer.service,span.kind | max_duration,min_duration,avg_duration,errors |
### HBase 存储格式
dapper:listidx
| rowkey | cf:kind:d:{duration nanosecond} | cf:kind:e:{span_id} |
|-------------------------------------------------------------------|----------------------------------------|--------------------|
| hex(hash({service_name})hex(hash({operation_name}))){timestamp/5} | hex({trace_id}):hex({span_id}) | hex({trace_id}) |
```
create 'dapper:listidx', {NAME=>'kind', VERSION=>1, TTL=>604800}
```
dapper:rawtrace
| rowkey | cf:pb:hex({span_id})_{c,s} |
|-----------------|--------------------------------|
| hex({trace_id}) | protobuf({raw_data}) |
```
create 'dapper:rawtrace', {NAME=>'pb', VERSION=>1, TTL=>604800}
```

View File

@@ -0,0 +1,37 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["agent.go"],
importpath = "go-common/app/service/main/dapper/agent",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/pkg/deliver:go_default_library",
"//app/service/main/dapper/pkg/diskqueue:go_default_library",
"//app/service/main/dapper/server/udpcollect:go_default_library",
"//library/log:go_default_library",
"//library/net/trace/proto:go_default_library",
"@com_github_golang_protobuf//proto:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,145 @@
package agent
import (
"fmt"
"io"
"runtime"
"strings"
"time"
"github.com/golang/protobuf/proto"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/pkg/deliver"
"go-common/app/service/main/dapper/pkg/diskqueue"
"go-common/app/service/main/dapper/server/udpcollect"
"go-common/library/log"
spanpb "go-common/library/net/trace/proto"
)
// Agent dapper collect agent
type Agent struct {
queue diskqueue.DiskQueue
clt *udpcollect.UDPCollect
dlv *deliver.Deliver
}
// New agent
func New(cfg *conf.AgentConfig, debug bool) (*Agent, error) {
var options []diskqueue.Option
if cfg.Queue.BucketBytes != 0 {
options = append(options, diskqueue.SetBucketByte(cfg.Queue.BucketBytes))
}
if cfg.Queue.MemBuckets != 0 {
options = append(options, diskqueue.SetDynamicMemBucket(cfg.Queue.MemBuckets))
}
queue, err := diskqueue.New(cfg.Queue.CacheDir, options...)
if err != nil {
return nil, err
}
workers := cfg.UDPCollect.Workers
if workers == 0 {
if runtime.NumCPU() > 4 {
workers = 4
} else {
workers = runtime.NumCPU()
}
}
clt, err := udpcollect.New(cfg.UDPCollect.Addr, workers, queue.Push)
if err != nil {
return nil, fmt.Errorf("new udpcollect error: %s", err)
}
if err := clt.Start(); err != nil {
return nil, err
}
ag := &Agent{queue: queue, clt: clt}
if !debug {
ag.dlv, err = deliver.New(cfg.Servers, ag.BlockReadFn)
} else {
go ag.debugPrinter()
}
return ag, err
}
// Reload config, only support reload servers config
func (a *Agent) Reload(cfg *conf.AgentConfig) error {
dlv, err := deliver.New(cfg.Servers, a.BlockReadFn)
if err != nil {
return err
}
if err := a.dlv.Close(); err != nil {
log.Warn("close old deliver error: %s", err)
}
a.dlv = dlv
return nil
}
// Close agent service
func (a *Agent) Close() error {
var messages []string
if err := a.clt.Close(); err != nil {
messages = append(messages, err.Error())
}
if a.dlv != nil {
if err := a.dlv.Close(); err != nil {
messages = append(messages, err.Error())
}
}
if err := a.queue.Close(); err != nil {
messages = append(messages, err.Error())
}
if len(messages) == 0 {
return nil
}
return fmt.Errorf("close agent error: %s", strings.Join(messages, "\n"))
}
// BlockReadFn wrap queue.Pop block
func (a *Agent) BlockReadFn() ([]byte, error) {
data, err := a.queue.Pop()
if err != io.EOF {
return data, err
}
tick := time.NewTicker(100 * time.Millisecond)
defer tick.Stop()
for {
select {
case <-tick.C:
}
data, err = a.queue.Pop()
if err != io.EOF {
return data, err
}
}
}
func (a *Agent) debugPrinter() {
for {
data, err := a.BlockReadFn()
if err != nil {
log.Error("read data error: %s", err)
continue
}
span := new(spanpb.Span)
if err = proto.Unmarshal(data, span); err != nil {
log.Error("unmarshal error: %s, data: %s", err, data)
continue
}
fmt.Printf("received span: %s\n", span)
var kind bool
var component bool
for _, tag := range span.Tags {
if tag.Key == "span.kind" {
kind = true
}
if tag.Key == "component" {
component = true
}
}
if !kind {
fmt.Printf("tag span.kind missing, Either \"client\" or \"server\" for the appropriate roles in an RPC, and \"producer\" or \"consumer\" for the appropriate roles in a messaging scenario.")
}
if !component {
fmt.Printf("tag component missing, The software package, framework, library, or module that generated the associated Span. E.g., \"grpc\", \"django\", \"JDBI\".")
}
}
}

View File

@@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = ["dapper-service-example.toml"],
importpath = "go-common/app/service/main/dapper/cmd",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/collector:go_default_library",
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/pkg/util:go_default_library",
"//library/log:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/dapper/cmd/agent:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "agent",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = [
"dapper-agent.toml",
"dapper-agent-example.toml",
],
importpath = "go-common/app/service/main/dapper/cmd/agent",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/agent:go_default_library",
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/pkg/util:go_default_library",
"//library/log:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,12 @@
# uat dapper-service
#servers = ["172.16.38.143:6190"]
servers = ["127.0.0.1:6190"]
[log]
stdout = true
[queue]
cache_dir = "queue-cache"
[udp_collect]
addr = "udp://0.0.0.0:2233"

View File

@@ -0,0 +1,11 @@
# uat dapper-service
servers = ["172.16.38.143:6190"]
[log]
dir = "/data/log/dapper-agent"
[queue]
cache_dir = "/data/log/dapper-collect/data"
[udp_collect]
addr = "unixgram:///var/run/dapper-collect/dapper-collect.sock"

View File

@@ -0,0 +1,52 @@
package main
import (
"flag"
"log"
"os"
"go-common/app/service/main/dapper/agent"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/pkg/util"
xlog "go-common/library/log"
)
var debug bool
func init() {
flag.BoolVar(&debug, "debug", false, "debug model decode and print span on stdout")
}
func main() {
if !flag.Parsed() {
flag.Parse()
}
cfg, err := conf.LoadAgentConfig()
if err != nil {
log.Fatalf("local agent config error: %s", err)
}
xlog.Init(cfg.Log)
defer xlog.Close()
ag, err := agent.New(cfg, debug)
if err != nil {
log.Fatalf("new agent service error: %s", err)
}
util.HandlerReload(func(s os.Signal) {
xlog.Warn("receive signal %s, dapper agent reload config", s)
cfg, err := conf.LoadAgentConfig()
if err != nil {
xlog.Error("load config error: %s, reload config fail!", err)
return
}
if err := ag.Reload(cfg); err != nil {
xlog.Error("reload config error: %s", err)
}
})
util.HandlerExit(func(s os.Signal) int {
if err := ag.Close(); err != nil {
xlog.Error("close agent error: %s", err)
return 1
}
return 0
})
}

View File

@@ -0,0 +1,27 @@
[dapper]
retention_day = 7
api_listen = "127.0.0.1:6193"
[log]
stdout = true
v = 10
[hbase]
addrs = "172.22.33.146"
[influx_db]
database = "dapper_uat"
addr = "http://172.22.33.146:8086"
# origin tcp collect
[collect]
network = "tcp"
addr = "127.0.0.1:6190"
[kafka_collect]
topic = "lancer_main_dapper_collector"
addrs = ["172.18.33.163:9092", "172.18.33.164:9092", "172.18.33.165:9092"]
[batch_writer]
# 64 MB buffer
raw_buf_size = 67108864
raw_chan_size = 4096
raw_workers = 5

View File

@@ -0,0 +1,48 @@
package main
import (
"flag"
"log"
"os"
"go-common/app/service/main/dapper/collector"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/pkg/util"
xlog "go-common/library/log"
)
func main() {
if !flag.Parsed() {
flag.Parse()
}
// load config file
if err := conf.Init(); err != nil {
log.Fatalf("init config error: %s", err)
}
// init xlog
xlog.Init(conf.Conf.Log)
defer xlog.Close()
xlog.Info("dapper-service starting")
// new collector service
srv, err := collector.New(conf.Conf)
if err != nil {
log.Fatalf("new dapper service error: %s", err)
}
if err := srv.ListenAndStart(); err != nil {
log.Fatalf("start dapper service error: %s", err)
}
//hsvr := http.New(srv)
//if err := hsvr.Start(); err != nil {
// log.Fatalf("start dapper http server error: %s", err)
//}
util.HandlerExit(func(s os.Signal) int {
xlog.Info("dapper-service get a signal %s", s.String())
if err := srv.Close(); err != nil {
xlog.Info("dapper-service exit, error: %s", err)
return 1
}
xlog.Info("dapper-service exit")
return 0
})
}

View File

@@ -0,0 +1,63 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"breaker.go",
"collector.go",
"detect.go",
"operation_name.go",
"process.go",
],
importpath = "go-common/app/service/main/dapper/collector",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/dao:go_default_library",
"//app/service/main/dapper/model:go_default_library",
"//app/service/main/dapper/pkg/batchwrite:go_default_library",
"//app/service/main/dapper/pkg/collect:go_default_library",
"//app/service/main/dapper/pkg/collect/kafkacollect:go_default_library",
"//app/service/main/dapper/pkg/collect/tcpcollect:go_default_library",
"//app/service/main/dapper/pkg/pointwrite:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = [
"breaker_test.go",
"collector_test.go",
"detect_test.go",
"operation_name_test.go",
],
embed = [":go_default_library"],
tags = ["automanaged"],
deps = [
"//app/service/main/dapper/model:go_default_library",
"//library/net/trace/proto:go_default_library",
],
)

View File

@@ -0,0 +1,69 @@
package collector
import (
"fmt"
"sync"
"go-common/app/service/main/dapper/model"
)
type countBreaker struct {
rmx sync.RWMutex
n int
slot map[string]struct{}
}
func (c *countBreaker) Break(key string) error {
c.rmx.Lock()
_, ok := c.slot[key]
c.rmx.Unlock()
if ok {
return nil
}
c.rmx.Lock()
c.slot[key] = struct{}{}
l := len(c.slot)
c.rmx.Unlock()
if l <= c.n {
return nil
}
return fmt.Errorf("%s reach limit number %d breaked", key, c.n)
}
func newCountBreaker(n int) *countBreaker {
return &countBreaker{n: n, slot: make(map[string]struct{})}
}
type serviceBreaker struct {
rmx sync.RWMutex
n int
slot map[string]*countBreaker
}
func (s *serviceBreaker) Process(span *model.Span) error {
s.rmx.RLock()
operationNameBreaker, ok1 := s.slot[span.ServiceName+"_o"]
peerServiceBreaker, ok2 := s.slot[span.ServiceName+"_p"]
s.rmx.RUnlock()
if !ok1 || !ok2 {
s.rmx.Lock()
if !ok1 {
operationNameBreaker = newCountBreaker(s.n)
s.slot[span.ServiceName+"_o"] = operationNameBreaker
}
if !ok2 {
peerServiceBreaker = newCountBreaker(s.n)
s.slot[span.ServiceName+"_p"] = peerServiceBreaker
}
s.rmx.Unlock()
}
if err := operationNameBreaker.Break(span.OperationName); err != nil {
return err
}
return peerServiceBreaker.Break(span.StringTag("peer.service"))
}
// NewServiceBreakerProcess .
func NewServiceBreakerProcess(n int) Processer {
return &serviceBreaker{n: n, slot: make(map[string]*countBreaker)}
}

View File

@@ -0,0 +1,24 @@
package collector
import (
"fmt"
"testing"
"go-common/app/service/main/dapper/model"
)
func TestServiceBreaker(t *testing.T) {
breaker := NewServiceBreakerProcess(10)
for i := 0; i < 20; i++ {
err := breaker.Process(&model.Span{ServiceName: "test", OperationName: fmt.Sprintf("opt_%d", i)})
if i < 10 {
if err != nil {
t.Error(err)
}
} else {
if err == nil {
t.Error("expect breaked")
}
}
}
}

View File

@@ -0,0 +1,195 @@
package collector
import (
"context"
"fmt"
"time"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/dao"
"go-common/app/service/main/dapper/model"
"go-common/app/service/main/dapper/pkg/batchwrite"
"go-common/app/service/main/dapper/pkg/collect"
"go-common/app/service/main/dapper/pkg/collect/kafkacollect"
"go-common/app/service/main/dapper/pkg/collect/tcpcollect"
"go-common/app/service/main/dapper/pkg/pointwrite"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
)
// Collector dapper collector receving trace data from tcp and write
// to hbase and influxdb
type Collector struct {
daoImpl dao.Dao
cfg *conf.Config
bw batchwrite.BatchWriter
pw pointwrite.PointWriter
process []Processer
clts []collect.Collecter
tcpClt *tcpcollect.TCPCollect
}
func (c *Collector) checkRetention(span *model.Span) error {
if c.cfg.Dapper.RetentionDay != 0 {
retentionSecond := c.cfg.Dapper.RetentionDay * 3600 * 24
if (time.Now().Unix() - span.StartTime.Unix()) > int64(retentionSecond) {
return fmt.Errorf("span beyond retention policy ignored")
}
}
return nil
}
// Process dispatch protoSpan
func (c *Collector) Process(ctx context.Context, protoSpan *model.ProtoSpan) error {
log.V(5).Info("dispatch span form serviceName: %s, operationName: %s", protoSpan.ServiceName, protoSpan.OperationName)
// ignored serviceName empry span
if protoSpan.ServiceName == "" {
log.Warn("span miss servicename ignored")
return nil
}
// fix operationName
if protoSpan.OperationName == "" {
protoSpan.OperationName = "missing operationname !!"
}
if protoSpan.ServiceName == "" {
log.Warn("span miss servicename ignored")
return nil
}
// convert to model.Span
span, err := model.FromProtoSpan(protoSpan, false)
if err != nil {
return err
}
// run process
for _, p := range c.process {
if err := p.Process(span); err != nil {
return err
}
}
if c.writeSamplePoint(span); err != nil {
log.Error("write sample point error: %s", err)
}
if c.writeRawTrace(span); err != nil {
log.Error("write sample point error: %s", err)
}
return nil
}
func (c *Collector) writeSamplePoint(span *model.Span) error {
return c.pw.WriteSpan(span)
}
func (c *Collector) writeRawTrace(span *model.Span) error {
return c.bw.WriteSpan(span)
}
// ListenAndStart listen and start collector server
func (c *Collector) ListenAndStart() error {
tcpClt := tcpcollect.New(c.cfg.Collect)
// NOTE: remove this future
c.tcpClt = tcpClt
c.clts = append(c.clts, tcpClt)
// if KafkaCollect is configured enable kafka collect
if c.cfg.KafkaCollect != nil {
kafkaClt, err := kafkacollect.New(c.cfg.KafkaCollect.Topic, c.cfg.KafkaCollect.Addrs)
if err != nil {
return err
}
c.clts = append(c.clts, kafkaClt)
}
for _, clt := range c.clts {
clt.RegisterProcess(c)
if err := clt.Start(); err != nil {
return err
}
}
if c.cfg.Dapper.APIListen != "" {
c.listenAndStartAPI()
}
return nil
}
func (c *Collector) listenAndStartAPI() {
engine := bm.NewServer(nil)
engine.GET("/x/internal/dapper-collector/client-status", c.clientStatusHandler)
engine.Start()
}
// Close collector
func (c *Collector) Close() error {
for _, clt := range c.clts {
if err := clt.Close(); err != nil {
log.Error("close collect error: %s", err)
}
}
c.bw.Close()
c.pw.Close()
return nil
}
// ClientStatus .
func (c *Collector) clientStatusHandler(bc *bm.Context) {
resp := &model.ClientStatusResp{QueueLen: c.bw.QueueLen()}
for _, cs := range c.tcpClt.ClientStatus() {
resp.Clients = append(resp.Clients, &model.ClientStatus{
Addr: cs.Addr,
UpTime: cs.UpTime,
ErrCount: cs.ErrorCounter.Value(),
Rate: cs.Counter.Value(),
})
}
bc.JSON(resp, nil)
}
// New new dapper collector
func New(cfg *conf.Config) (*Collector, error) {
daoImpl, err := dao.New(cfg)
if err != nil {
return nil, err
}
bw := batchwrite.NewRawDataBatchWriter(
daoImpl.WriteRawTrace,
cfg.BatchWriter.RawBufSize,
cfg.BatchWriter.RawChanSize,
cfg.BatchWriter.RawWorkers,
0,
)
detectData := make(map[string]map[string]struct{})
serviceNames, err := daoImpl.FetchServiceName(context.Background())
if err != nil {
return nil, nil
}
log.V(10).Info("fetch serviceNames get %d", len(serviceNames))
for _, serviceName := range serviceNames {
operationNames, err := daoImpl.FetchOperationName(context.Background(), serviceName)
if err != nil {
log.Error("fetch operationName for %s error: %s", serviceName, err)
continue
}
log.V(10).Info("fetch operationName for %s get %d", serviceName, len(operationNames))
detectData[serviceName] = make(map[string]struct{})
for _, operationName := range operationNames {
detectData[serviceName][operationName] = struct{}{}
}
}
// TODO configable
pw := pointwrite.New(daoImpl.BatchWriteSpanPoint, 5, 10*time.Second)
// register processer
var process []Processer
process = append(process, NewOperationNameProcess())
// FIXME: breaker not work
process = append(process, NewServiceBreakerProcess(4096))
process = append(process, NewPeerServiceDetectProcesser(detectData))
return &Collector{
cfg: cfg,
daoImpl: daoImpl,
bw: bw,
pw: pw,
process: process,
}, nil
}

View File

@@ -0,0 +1 @@
package collector

View File

@@ -0,0 +1,73 @@
package collector
import (
"sync"
"go-common/app/service/main/dapper/model"
)
type peerServiceDetect struct {
rmx sync.RWMutex
pair map[string]string
}
func (p *peerServiceDetect) detect(operationName string) (string, bool) {
p.rmx.RLock()
serviceName, ok := p.pair[operationName]
p.rmx.RUnlock()
return serviceName, ok
}
func (p *peerServiceDetect) add(serviceName, operationName string) {
if operationName == "" || serviceName == "" {
// ignored empty
return
}
p.rmx.RLock()
val, ok := p.pair[operationName]
p.rmx.RUnlock()
if !ok || serviceName != val {
p.rmx.Lock()
p.pair[operationName] = serviceName
p.rmx.Unlock()
}
}
func (p *peerServiceDetect) process(span *model.Span) {
if span.IsServer() {
p.add(span.ServiceName, span.OperationName)
return
}
if span.GetTagString("peer.service") != "" {
return
}
peerService, ok := p.detect(span.OperationName)
if ok {
span.SetTag("peer.service", peerService)
span.SetTag("_auto.peer.service", true)
return
}
if peerSign := span.StringTag("_peer.sign"); peerSign != "" {
peerService, ok := p.detect(peerSign)
if ok {
span.SetTag("peer.service", peerService)
span.SetTag("_auto.peer.service", true)
}
}
}
func (p *peerServiceDetect) Process(span *model.Span) error {
p.process(span)
return nil
}
// NewPeerServiceDetectProcesser .
func NewPeerServiceDetectProcesser(data map[string]map[string]struct{}) Processer {
p := &peerServiceDetect{pair: make(map[string]string)}
for serviceName, operationNames := range data {
for operationName := range operationNames {
p.add(serviceName, operationName)
}
}
return p
}

View File

@@ -0,0 +1,26 @@
package collector
import (
"testing"
"go-common/app/service/main/dapper/model"
)
func TestPeerServiceDetect(t *testing.T) {
detect := NewPeerServiceDetectProcesser(map[string]map[string]struct{}{
"s_a": {
"o_1": struct{}{},
"o_11": struct{}{},
},
"s_b": {"o_2": struct{}{}},
"s_c": {"o_21": struct{}{}},
})
sp1, _ := model.FromProtoSpan(&model.ProtoSpan{ServiceName: "xxx", OperationName: "o_1"}, false)
detect.Process(sp1)
if sp1.StringTag("peer.service") != "s_a" && sp1.BoolTag("_auto.peer.service") {
t.Errorf("expect get s_a get %s", sp1.StringTag("peer.service"))
}
if err := detect.Process(&model.Span{ServiceName: "hh", OperationName: ""}); err != nil {
t.Errorf("expect get noting")
}
}

View File

@@ -0,0 +1,46 @@
package collector
import (
"net/url"
"strings"
)
import (
"go-common/app/service/main/dapper/model"
)
// OperationNameProcess fix operation name so sad!
type OperationNameProcess struct{}
// Process implement operation name
func (o *OperationNameProcess) Process(span *model.Span) error {
switch {
case !span.IsServer() && strings.HasPrefix(span.OperationName, "http://"):
o.fixHTTP(span)
}
return nil
}
func (o *OperationNameProcess) fixHTTP(span *model.Span) {
oldOperationName := span.OperationName
method := "UNKONWN"
if methodTag := span.GetTagString("http.method"); methodTag != "" {
method = methodTag
}
operationName := "HTTP:" + method
span.SetOperationName(operationName)
peerSign := oldOperationName
if strings.HasPrefix(oldOperationName, "http://") {
if reqURL, err := url.Parse(oldOperationName); err == nil {
peerSign = reqURL.Path
span.SetTag("http.url", oldOperationName)
}
}
span.SetTag("_peer.sign", peerSign)
}
// NewOperationNameProcess .
func NewOperationNameProcess() Processer {
return &OperationNameProcess{}
}

View File

@@ -0,0 +1,27 @@
package collector
import (
"testing"
"go-common/app/service/main/dapper/model"
protogen "go-common/library/net/trace/proto"
)
func TestOperationNameProcess(t *testing.T) {
p := NewOperationNameProcess()
sp1, _ := model.FromProtoSpan(&model.ProtoSpan{
ServiceName: "http",
OperationName: "http://www.example.com/echo",
Tags: []*protogen.Tag{&protogen.Tag{Key: "span.kind", Kind: protogen.Tag_STRING, Value: []byte("client")}},
}, false)
p.Process(sp1)
if sp1.OperationName != "HTTP:UNKONWN" || sp1.ProtoSpan.OperationName != "HTTP:UNKONWN" {
t.Errorf("expect operationName == , get %s %s", sp1.OperationName, sp1.ProtoSpan.OperationName)
}
if sp1.StringTag("http.url") != "http://www.example.com/echo" {
t.Errorf("expect http.url be set")
}
if sp1.StringTag("_peer.sign") != "/echo" {
t.Errorf("expect _peer.sign be set")
}
}

View File

@@ -0,0 +1,16 @@
package collector
import (
"go-common/app/service/main/dapper/model"
)
// Processer span processer
type Processer interface {
Process(span *model.Span) error
}
// ProcessFunc implement Processer
type ProcessFunc func(*model.Span) error
// Process implement Processer
func (p ProcessFunc) Process(span *model.Span) error { return p(span) }

View File

@@ -0,0 +1,37 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"agentconfig.go",
"config.go",
],
importpath = "go-common/app/service/main/dapper/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/conf:go_default_library",
"//library/log:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,68 @@
package conf
import (
"fmt"
"github.com/BurntSushi/toml"
"go-common/library/conf"
"go-common/library/log"
)
const (
_agentConfigKey = "dapper-agent.toml"
)
// LoadAgentConfig LoadAgentConfig
func LoadAgentConfig() (*AgentConfig, error) {
if confPath != "" {
cfg := new(AgentConfig)
_, err := toml.DecodeFile(confPath, cfg)
return cfg, err
}
return remoteAgentConfig()
}
// AgentConfig config for dapper agent
type AgentConfig struct {
Servers []string `toml:"servers"`
Log *log.Config `toml:"log"`
Queue *QueueConfig `toml:"queue"`
UDPCollect UDPCollectConfig `toml:"udp_collect"`
}
// QueueConfig internal queue config
type QueueConfig struct {
// queue local stroage path
MemBuckets int `toml:"mem_buckets"`
BucketBytes int `toml:"bucket_bytes"`
CacheDir string `toml:"cache_dir"`
}
// UDPCollectConfig collect config
type UDPCollectConfig struct {
Workers int `toml:"workers"`
Addr string `toml:"addr"`
}
func remoteAgentConfig() (*AgentConfig, error) {
client, err := conf.New()
if err != nil {
return nil, fmt.Errorf("new config center client error: %s", err)
}
data, ok := client.Value2(_agentConfigKey)
if !ok {
return nil, fmt.Errorf("load config center error key %s not found", _agentConfigKey)
}
cfg := new(AgentConfig)
_, err = toml.Decode(data, cfg)
if err != nil {
return nil, fmt.Errorf("could not decode config file %s, error: %s", _agentConfigKey, err)
}
go func() {
for range client.Event() {
// ignore config change event
}
}()
return cfg, nil
}

View File

@@ -0,0 +1,129 @@
package conf
import (
"errors"
"flag"
"github.com/BurntSushi/toml"
"go-common/library/conf"
"go-common/library/log"
xtime "go-common/library/time"
)
func init() {
flag.StringVar(&confPath, "conf", "", "config file")
}
var (
confPath string
// Conf conf
Conf = &Config{}
client *conf.Client
)
// Config config.
type Config struct {
Log *log.Config `toml:"log"`
HBase *HBaseConfig `toml:"hbase"`
InfluxDB *InfluxDBConfig `toml:"influx_db"`
Collect *Collect `toml:"collect"`
BatchWriter *BatchWriter `toml:"batch_writer"`
Dapper *DapperConfig `toml:"dapper"`
KafkaCollect *KafkaCollect `toml:"kafka_collect"`
}
// DapperConfig .
type DapperConfig struct {
RetentionDay int `toml:"retention_day"`
APIListen string `toml:"api_listen"`
}
// HBaseConfig hbase config
type HBaseConfig struct {
Namespace string `toml:"namespace"`
Addrs string `toml:"addrs"`
RPCQueueSize int `toml:"rpc_queue_size"`
FlushInterval xtime.Duration `toml:"flush_interval"`
EffectiveUser string `toml:"effective_user"`
RegionLookupTimeout xtime.Duration `toml:"region_lookup_timeout"`
RegionReadTimeout xtime.Duration `toml:"region_read_timeout"`
}
// InfluxDBConfig InfluxDBConfig
type InfluxDBConfig struct {
Addr string `toml:"addr"`
Username string `toml:"username"`
Password string `toml:"password"`
Database string `toml:"database"`
}
// Collect config.
type Collect struct {
Network string
Addr string
}
// KafkaCollect .
type KafkaCollect struct {
Topic string `toml:"topic"`
Addrs []string `toml:"addrs"`
}
// BatchWriter config
type BatchWriter struct {
SummaryWorkers int `toml:"summary_workers"`
SummaryBulkSize int `toml:"summary_bulk_size"`
SummaryChanSize int `toml:"summary_chan_size"`
RawWorkers int `toml:"raw_workers"`
RawBufSize int `toml:"raw_buf_size"`
RawChanSize int `toml:"raw_chan_size"`
FlushInterval xtime.Duration `toml:"flush_interval"`
}
// Init config
func Init() (err error) {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if err := load(); err != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Value2("dapper-service.toml"); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return errors.New("could not decode config")
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,50 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["dao_test.go"],
embed = [":go_default_library"],
tags = ["automanaged"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/model:go_default_library",
"//vendor/github.com/smartystreets/goconvey/convey:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = ["dao.go"],
importpath = "go-common/app/service/main/dapper/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/model:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/dgryski/go-farm:go_default_library",
"//vendor/github.com/influxdata/influxdb/client/v2:go_default_library",
"//vendor/github.com/tsuna/gohbase:go_default_library",
"//vendor/github.com/tsuna/gohbase/hrpc:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,220 @@
package dao
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"github.com/dgryski/go-farm"
influxdb "github.com/influxdata/influxdb/client/v2"
"github.com/tsuna/gohbase"
"github.com/tsuna/gohbase/hrpc"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/model"
"go-common/library/log"
)
const (
defaultHbaseNameSpace = "ugc"
defaultInfluxDatabase = "dapper"
hbaseRawTraceTable = "DapperRawtrace"
hbaseRawTraceFamily = "pb"
hbaseListIdxTable = "DapperListidx"
hbaseListIdxFamily = "kind"
serviceNameTag = "service_name"
operationNameTag = "operation_name"
peerServiceTag = "peer.service"
spanKindTag = "span.kind"
maxDurationField = "max_duration"
minDurationField = "min_duration"
avgDurationField = "avg_duration"
spanpointMeasurement = "span_point"
errorsField = "errors"
)
// Dao interface
type Dao interface {
// WriteRawSpan to hbase
WriteRawTrace(ctx context.Context, rowKey string, values map[string][]byte) error
// BatchWriteSpanPoint
BatchWriteSpanPoint(ctx context.Context, spanPoints []*model.SpanPoint) error
// Fetch ServiceName
FetchServiceName(ctx context.Context) ([]string, error)
// Fetch OperationName
FetchOperationName(ctx context.Context, serviceName string) ([]string, error)
}
// New dao
func New(cfg *conf.Config) (Dao, error) {
// disable rpc queue
hbaseClient := gohbase.NewClient(cfg.HBase.Addrs, gohbase.RpcQueueSize(0))
hbaseNameSpace := defaultHbaseNameSpace
if cfg.HBase.Namespace != "" {
hbaseNameSpace = cfg.HBase.Namespace
}
influxdbCfg := influxdb.HTTPConfig{Addr: cfg.InfluxDB.Addr, Username: cfg.InfluxDB.Username, Password: cfg.InfluxDB.Password}
influxdbClient, err := influxdb.NewHTTPClient(influxdbCfg)
if err != nil {
return nil, err
}
influxDatabase := defaultInfluxDatabase
if cfg.InfluxDB.Database != "" {
influxDatabase = cfg.InfluxDB.Database
}
return &dao{
hbaseNameSpace: hbaseNameSpace,
hbaseClient: hbaseClient,
influxDatabase: influxDatabase,
influxdbClient: influxdbClient,
}, nil
}
var _ Dao = &dao{}
type dao struct {
hbaseNameSpace string
hbaseClient gohbase.Client
influxDatabase string
influxdbClient influxdb.Client
}
func (d *dao) WriteRawTrace(ctx context.Context, rowKey string, values map[string][]byte) error {
table := d.hbaseNameSpace + ":" + hbaseRawTraceTable
put, err := hrpc.NewPutStr(ctx, table, rowKey, map[string]map[string][]byte{hbaseRawTraceFamily: values})
if err != nil {
return err
}
_, err = d.hbaseClient.Put(put)
return err
}
func (d *dao) BatchWriteSpanPoint(ctx context.Context, spanPoints []*model.SpanPoint) error {
var messages []string
batchPoint, err := influxdb.NewBatchPoints(influxdb.BatchPointsConfig{Database: d.influxDatabase, Precision: "1s"})
if err != nil {
return err
}
for _, spanPoint := range spanPoints {
if err := d.writeSamplePoint(ctx, spanPoint); err != nil {
messages = append(messages, err.Error())
}
if point, err := toInfluxDBPoint(spanPoint); err != nil {
messages = append(messages, err.Error())
} else {
batchPoint.AddPoint(point)
}
}
if err := d.influxdbClient.Write(batchPoint); err != nil {
messages = append(messages, err.Error())
}
if len(messages) != 0 {
return fmt.Errorf("%s", strings.Join(messages, "\n"))
}
return nil
}
func (d *dao) FetchServiceName(ctx context.Context) ([]string, error) {
command := fmt.Sprintf("SHOW TAG VALUES FROM span_point WITH KEY = %s", serviceNameTag)
log.V(10).Info("query command %s", command)
query := influxdb.NewQuery(command, d.influxDatabase, "1s")
resp, err := d.influxdbClient.Query(query)
if err != nil {
return nil, err
}
if len(resp.Results) == 0 || len(resp.Results[0].Series) == 0 {
return make([]string, 0), nil
}
rows := resp.Results[0].Series[0]
serviceNames := make([]string, 0, len(rows.Values))
for _, kv := range rows.Values {
if len(kv) != 2 {
continue
}
if serviceName, ok := kv[1].(string); ok {
serviceNames = append(serviceNames, serviceName)
}
}
return serviceNames, nil
}
func (d *dao) FetchOperationName(ctx context.Context, serviceName string) ([]string, error) {
command := fmt.Sprintf("SHOW TAG VALUES FROM %s WITH KEY = %s WHERE %s = '%s' AND %s = '%s'",
spanpointMeasurement, operationNameTag, serviceNameTag, serviceName, spanKindTag, "server")
log.V(10).Info("query command %s", command)
query := influxdb.NewQuery(command, d.influxDatabase, "1s")
resp, err := d.influxdbClient.Query(query)
if err != nil {
return nil, err
}
if len(resp.Results) == 0 || len(resp.Results[0].Series) == 0 {
return make([]string, 0), nil
}
rows := resp.Results[0].Series[0]
operationNames := make([]string, 0, len(rows.Values))
for _, kv := range rows.Values {
if len(kv) != 2 {
continue
}
if operationName, ok := kv[1].(string); ok {
operationNames = append(operationNames, operationName)
}
}
return operationNames, nil
}
func (d *dao) writeSamplePoint(ctx context.Context, spanPoint *model.SpanPoint) error {
table := d.hbaseNameSpace + ":" + hbaseListIdxTable
rowKey := listIdxKey(spanPoint)
values := make(map[string][]byte)
values = fuelDurationSamplePoint(values, spanPoint.MaxDuration, spanPoint.AvgDuration, spanPoint.MinDuration)
values = fuelErrrorSamplePoint(values, spanPoint.Errors...)
put, err := hrpc.NewPutStr(ctx, table, rowKey, map[string]map[string][]byte{hbaseListIdxFamily: values})
if err != nil {
return err
}
_, err = d.hbaseClient.Put(put)
return err
}
func listIdxKey(spanPoint *model.SpanPoint) string {
serviceNameHash := farm.Hash32([]byte(spanPoint.ServiceName))
operationNameHash := farm.Hash32([]byte(spanPoint.OperationName))
return fmt.Sprintf("%x%x%d", serviceNameHash, operationNameHash, spanPoint.Timestamp)
}
func fuelDurationSamplePoint(values map[string][]byte, points ...model.SamplePoint) map[string][]byte {
for i := range points {
key := "d:" + strconv.FormatInt(points[i].Value, 10)
values[key] = []byte(fmt.Sprintf("%x:%x", points[i].TraceID, points[i].SpanID))
}
return values
}
func fuelErrrorSamplePoint(values map[string][]byte, points ...model.SamplePoint) map[string][]byte {
for i := range points {
key := "e:" + strconv.FormatInt(points[i].Value, 10)
values[key] = []byte(fmt.Sprintf("%x:%x", points[i].TraceID, points[i].SpanID))
}
return values
}
func toInfluxDBPoint(spanPoint *model.SpanPoint) (*influxdb.Point, error) {
tags := map[string]string{
serviceNameTag: spanPoint.ServiceName,
operationNameTag: spanPoint.OperationName,
spanKindTag: spanPoint.SpanKind,
peerServiceTag: spanPoint.PeerService,
}
fields := map[string]interface{}{
maxDurationField: spanPoint.MaxDuration.Value,
minDurationField: spanPoint.MinDuration.Value,
avgDurationField: spanPoint.AvgDuration.Value,
errorsField: len(spanPoint.Errors),
}
return influxdb.NewPoint(spanpointMeasurement, tags, fields, time.Unix(spanPoint.Timestamp, 0))
}

View File

@@ -0,0 +1,144 @@
package dao
import (
"context"
"flag"
"log"
"math/rand"
"os"
"strconv"
"testing"
"time"
. "github.com/smartystreets/goconvey/convey"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/model"
)
func init() {
rand.Seed(time.Now().UnixNano())
}
var cfg *conf.Config
var flagMap = map[string]string{
"app_id": "main.common-arch.dapper-service",
"conf_token": "528dd7e00bb411e894c14a552f48fef8",
"tree_id": "5172",
"conf_version": "server-1",
"deploy_env": "uat",
"conf_host": "config.bilibili.co",
"conf_path": os.TempDir(),
"region": "sh",
"zone": "sh001",
}
func TestMain(m *testing.M) {
for key, val := range flagMap {
flag.Set(key, val)
}
flag.Parse()
if err := conf.Init(); err != nil {
log.Printf("init config from remote error: %s", err)
}
cfg = conf.Conf
if cfg.InfluxDB != nil {
cfg.InfluxDB.Database = "dapper_ut"
}
if cfg.HBase != nil {
cfg.HBase.Namespace = "dapperut"
}
if hbaseAddrs := os.Getenv("TEST_HBASE_ADDRS"); hbaseAddrs != "" {
cfg.HBase = &conf.HBaseConfig{Addrs: hbaseAddrs, Namespace: "dapperut"}
if influxdbAddr := os.Getenv("TEST_INFLUXDB_ADDR"); influxdbAddr != "" {
cfg.InfluxDB = &conf.InfluxDBConfig{Addr: influxdbAddr, Database: "dapper_ut"}
}
}
os.Exit(m.Run())
}
func TestDao(t *testing.T) {
if cfg == nil {
t.Skipf("no config provide skipped")
}
daoImpl, err := New(cfg)
if err != nil {
t.Fatalf("new dao error: %s", err)
}
ctx := context.Background()
Convey("test fetch serviceName and operationName", t, func() {
serviceNames, err := daoImpl.FetchServiceName(ctx)
So(err, ShouldBeNil)
So(serviceNames, ShouldNotBeEmpty)
for _, serviceName := range serviceNames {
operationNames, err := daoImpl.FetchOperationName(ctx, serviceName)
So(err, ShouldBeNil)
t.Logf("%s operationNames: %v", serviceName, operationNames)
}
})
Convey("test write rawtrace", t, func() {
if err := daoImpl.WriteRawTrace(
context.Background(),
strconv.FormatUint(rand.Uint64(), 16),
map[string][]byte{strconv.FormatUint(rand.Uint64(), 16): []byte("hello world")},
); err != nil {
t.Error(err)
}
})
Convey("test batchwrite span point", t, func() {
points := []*model.SpanPoint{
&model.SpanPoint{
ServiceName: "service_a",
OperationName: "opt1",
PeerService: "peer_service_a",
SpanKind: "client",
Timestamp: time.Now().Unix() - rand.Int63n(3600),
MaxDuration: model.SamplePoint{
SpanID: rand.Uint64(),
TraceID: rand.Uint64(),
Value: rand.Int63n(1024),
},
MinDuration: model.SamplePoint{
SpanID: rand.Uint64(),
TraceID: rand.Uint64(),
Value: rand.Int63n(1024),
},
AvgDuration: model.SamplePoint{
SpanID: rand.Uint64(),
TraceID: rand.Uint64(),
Value: rand.Int63n(1024),
},
Errors: []model.SamplePoint{
model.SamplePoint{
SpanID: rand.Uint64(),
TraceID: rand.Uint64(),
Value: 1,
},
model.SamplePoint{
SpanID: rand.Uint64(),
TraceID: rand.Uint64(),
Value: 1,
},
},
},
&model.SpanPoint{
ServiceName: "service_b",
OperationName: "opt2",
PeerService: "peer_service_b",
SpanKind: "server",
Timestamp: time.Now().Unix() - rand.Int63n(3600),
},
&model.SpanPoint{
ServiceName: "service_c",
OperationName: "opt3",
PeerService: "peer_service_c",
SpanKind: "client",
Timestamp: time.Now().Unix() - rand.Int63n(3600),
},
}
err := daoImpl.BatchWriteSpanPoint(context.Background(), points)
if err != nil {
t.Error(err)
}
})
}

View File

@@ -0,0 +1,4 @@
#!/bin/bash
export TEST_INFLUXDB_ADDR=http://172.22.33.146:8086
export TEST_HBASE_ADDRS=172.18.33.131,172.18.33.168,172.18.33.169

View File

@@ -0,0 +1,39 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"response.go",
"span.go",
"spanpoint.go",
"util.go",
],
importpath = "go-common/app/service/main/dapper/model",
tags = ["manual"],
visibility = ["//visibility:public"],
deps = [
"//library/net/trace/proto:go_default_library",
"@com_github_golang_protobuf//proto:go_default_library",
"@io_bazel_rules_go//proto/wkt:duration_go_proto",
"@io_bazel_rules_go//proto/wkt:timestamp_go_proto",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,15 @@
package model
// ClientStatusResp response clientstatus request just for debug
type ClientStatusResp struct {
QueueLen int `json:"queue_len"`
Clients []*ClientStatus `json:"clients"`
}
// ClientStatus client status
type ClientStatus struct {
Addr string `json:"addr"`
UpTime int64 `json:"up_time"`
ErrCount int64 `json:"err_count"`
Rate int64 `json:"rate"`
}

View File

@@ -0,0 +1,142 @@
package model
import (
"strconv"
"time"
"github.com/golang/protobuf/proto"
protogen "go-common/library/net/trace/proto"
)
// ProtoSpan alias to tgo-common/library/net/trace/proto.Span
type ProtoSpan protogen.Span
// RefType Kind
const (
RefTypeChildOf int8 = iota
RefTypeFollowsFrom
)
// TagKind
const (
TagString int8 = iota
TagInt
TagBool
TagFloat
)
// SpanRef describes causal relationship of the current span to another span (e.g. 'child-of')
type SpanRef struct {
RefType int8
TraceID uint64
SpanID uint64
}
// Tag span tag
type Tag struct {
Kind int8
Key string
Value interface{}
}
// Field log field
type Field struct {
Key string
Value []byte
}
// Log span log
type Log struct {
Timestamp int64
Fields []Field
}
// Span represents a named unit of work performed by a service.
type Span struct {
ServiceName string
OperationName string
TraceID uint64
SpanID uint64
ParentID uint64
Env string
StartTime time.Time
Duration time.Duration
References []SpanRef
Tags map[string]interface{}
Logs []Log
ProtoSpan *ProtoSpan
}
// SetTag attach tag
func (s *Span) SetTag(key string, value interface{}) error {
ptag, err := toProtoTag(key, value)
if err != nil {
return err
}
s.Tags[key] = value
s.ProtoSpan.Tags = append(s.ProtoSpan.Tags, ptag)
return nil
}
// SetOperationName .
func (s *Span) SetOperationName(operationName string) {
s.OperationName = operationName
s.ProtoSpan.OperationName = operationName
}
// TraceIDStr return hex format trace_id
func (s *Span) TraceIDStr() string {
return strconv.FormatUint(s.TraceID, 16)
}
// SpanIDStr return hex format span_id
func (s *Span) SpanIDStr() string {
return strconv.FormatUint(s.SpanID, 16)
}
// ParentIDStr return hex format parent_id
func (s *Span) ParentIDStr() string {
return strconv.FormatUint(s.ParentID, 16)
}
// IsServer span kind is server
func (s *Span) IsServer() bool {
kind, ok := s.Tags["span.kind"].(string)
if !ok {
return false
}
return kind == "server"
}
// IsError is error happend
func (s *Span) IsError() bool {
isErr, _ := s.Tags["error"].(bool)
return isErr
}
// StringTag get string type tag
func (s *Span) StringTag(key string) string {
val, _ := s.Tags[key].(string)
return val
}
// BoolTag get string type tag
func (s *Span) BoolTag(key string) bool {
val, _ := s.Tags[key].(bool)
return val
}
// GetTagString .
func (s *Span) GetTagString(key string) string {
val, _ := s.Tags[key].(string)
return val
}
// Marshal return
func (s *Span) Marshal() ([]byte, error) {
if s.ProtoSpan == nil {
return nil, nil
}
return proto.Marshal((*protogen.Span)(s.ProtoSpan))
}

View File

@@ -0,0 +1,24 @@
package model
// const for SpanPoint
const ()
// SamplePoint SamplePoint
type SamplePoint struct {
TraceID uint64
SpanID uint64
Value int64
}
// SpanPoint contains time series
type SpanPoint struct {
Timestamp int64
ServiceName string
OperationName string
PeerService string
SpanKind string
AvgDuration SamplePoint // random sample point
MaxDuration SamplePoint
MinDuration SamplePoint
Errors []SamplePoint
}

View File

@@ -0,0 +1,249 @@
package model
import (
"encoding/binary"
"fmt"
"math"
"strconv"
"time"
"github.com/golang/protobuf/ptypes/duration"
"github.com/golang/protobuf/ptypes/timestamp"
protogen "go-common/library/net/trace/proto"
)
const protoVersion2 int32 = 2
// FromProtoSpan convert protogen.Span to model.Span
func FromProtoSpan(protoSpan *ProtoSpan, parseLog bool) (*Span, error) {
var span *Span
var err error
if protoSpan.Version != protoVersion2 {
span, err = fromProtoSpanLeagcy(protoSpan, parseLog)
} else {
span, err = fromProtoSpanInternal(protoSpan, parseLog)
}
if err == nil {
// NOTE: !!
span.ProtoSpan = protoSpan
}
return span, err
}
func convertLeagcyTag(protoTag *protogen.Tag) Tag {
tag := Tag{Key: protoTag.Key}
switch protoTag.Kind {
case protogen.Tag_STRING:
tag.Kind = TagString
tag.Value = string(protoTag.Value)
case protogen.Tag_INT:
tag.Kind = TagInt
tag.Value, _ = strconv.ParseInt(string(protoTag.Value), 10, 64)
case protogen.Tag_BOOL:
tag.Kind = TagBool
tag.Value, _ = strconv.ParseBool(string(protoTag.Value))
case protogen.Tag_FLOAT:
tag.Kind = TagFloat
tag.Value, _ = strconv.ParseFloat(string(protoTag.Value), 64)
}
return tag
}
func convertLeagcyLog(protoLog *protogen.Log) Log {
log := Log{Timestamp: protoLog.Timestamp}
log.Fields = []Field{{Key: protoLog.Key, Value: protoLog.Value}}
return log
}
func fromProtoSpanLeagcy(protoSpan *ProtoSpan, parseLog bool) (*Span, error) {
span := &Span{
ServiceName: protoSpan.ServiceName,
OperationName: protoSpan.OperationName,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.SpanId,
Env: protoSpan.Env,
ParentID: protoSpan.ParentId,
}
span.StartTime = time.Unix(protoSpan.StartAt/int64(time.Second), protoSpan.StartAt%int64(time.Second))
span.Duration = time.Duration(protoSpan.FinishAt - protoSpan.StartAt)
span.References = []SpanRef{{
RefType: RefTypeChildOf,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.ParentId,
}}
span.Tags = make(map[string]interface{})
for _, tag := range protoSpan.Tags {
newTag := convertLeagcyTag(tag)
span.Tags[newTag.Key] = newTag.Value
}
if !parseLog {
return span, nil
}
span.Logs = make([]Log, 0, len(protoSpan.Logs))
for _, log := range protoSpan.Logs {
span.Logs = append(span.Logs, convertLeagcyLog(log))
}
return span, nil
}
func timeFromTimestamp(t *timestamp.Timestamp) time.Time {
return time.Unix(t.Seconds, int64(t.Nanos))
}
func durationFromDuration(d *duration.Duration) time.Duration {
return time.Duration(d.Seconds*int64(time.Second) + int64(d.Nanos))
}
func convertSpanRef(protoRef *protogen.SpanRef) SpanRef {
ref := SpanRef{
TraceID: protoRef.TraceId,
SpanID: protoRef.SpanId,
}
switch protoRef.RefType {
case protogen.SpanRef_CHILD_OF:
ref.RefType = RefTypeChildOf
case protogen.SpanRef_FOLLOWS_FROM:
ref.RefType = RefTypeFollowsFrom
}
return ref
}
func unSerializeInt64(data []byte) int64 {
return int64(binary.BigEndian.Uint64(data))
}
func unSerializeBool(data []byte) bool {
return data[0] == byte(1)
}
func unSerializeFloat64(data []byte) float64 {
value := binary.BigEndian.Uint64(data)
return math.Float64frombits(value)
}
func convertTag(protoTag *protogen.Tag) Tag {
tag := Tag{Key: protoTag.Key}
switch protoTag.Kind {
case protogen.Tag_STRING:
tag.Kind = TagString
tag.Value = string(protoTag.Value)
case protogen.Tag_INT:
tag.Kind = TagInt
tag.Value = unSerializeInt64(protoTag.Value)
case protogen.Tag_BOOL:
tag.Kind = TagBool
tag.Value = unSerializeBool(protoTag.Value)
case protogen.Tag_FLOAT:
tag.Kind = TagFloat
tag.Value = unSerializeFloat64(protoTag.Value)
}
return tag
}
func convertLog(protoLog *protogen.Log) Log {
log := Log{Timestamp: protoLog.Timestamp}
log.Fields = make([]Field, 0, len(protoLog.Fields))
for _, protoFiled := range protoLog.Fields {
log.Fields = append(log.Fields, Field{Key: protoFiled.Key, Value: protoFiled.Value})
}
return log
}
func fromProtoSpanInternal(protoSpan *ProtoSpan, parseLog bool) (*Span, error) {
span := &Span{
ServiceName: protoSpan.ServiceName,
OperationName: protoSpan.OperationName,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.SpanId,
ParentID: protoSpan.ParentId,
Env: protoSpan.Env,
StartTime: timeFromTimestamp(protoSpan.StartTime),
Duration: durationFromDuration(protoSpan.Duration),
}
span.References = make([]SpanRef, 0, len(protoSpan.References))
for _, ref := range protoSpan.References {
span.References = append(span.References, convertSpanRef(ref))
}
span.Tags = make(map[string]interface{})
for _, tag := range protoSpan.Tags {
newTag := convertTag(tag)
span.Tags[newTag.Key] = newTag.Value
}
if !parseLog {
return span, nil
}
span.Logs = make([]Log, 0, len(protoSpan.Logs))
for _, log := range protoSpan.Logs {
span.Logs = append(span.Logs, convertLog(log))
}
return span, nil
}
// ParseProtoSpanTag tag
func ParseProtoSpanTag(protoSpan *protogen.Span) map[string]interface{} {
tagMap := make(map[string]interface{})
var convertFn func(*protogen.Tag) Tag
if protoSpan.Version == protoVersion2 {
convertFn = convertTag
} else {
convertFn = convertLeagcyTag
}
for _, protoTag := range protoSpan.Tags {
tag := convertFn(protoTag)
tagMap[tag.Key] = tag.Value
}
return tagMap
}
func serializeInt64(v int64) []byte {
data := make([]byte, 8)
binary.BigEndian.PutUint64(data, uint64(v))
return data
}
func serializeFloat64(v float64) []byte {
data := make([]byte, 8)
binary.BigEndian.PutUint64(data, math.Float64bits(v))
return data
}
func serializeBool(v bool) []byte {
data := make([]byte, 1)
if v {
data[0] = byte(1)
} else {
data[0] = byte(0)
}
return data
}
func toProtoTag(key string, value interface{}) (*protogen.Tag, error) {
ptag := &protogen.Tag{Key: key}
switch value := value.(type) {
case string:
ptag.Kind = protogen.Tag_STRING
ptag.Value = []byte(value)
case int:
ptag.Kind = protogen.Tag_INT
ptag.Value = serializeInt64(int64(value))
case int32:
ptag.Kind = protogen.Tag_INT
ptag.Value = serializeInt64(int64(value))
case int64:
ptag.Kind = protogen.Tag_INT
ptag.Value = serializeInt64(value)
case bool:
ptag.Kind = protogen.Tag_BOOL
ptag.Value = serializeBool(value)
case float32:
ptag.Kind = protogen.Tag_BOOL
ptag.Value = serializeFloat64(float64(value))
case float64:
ptag.Kind = protogen.Tag_BOOL
ptag.Value = serializeFloat64(value)
default:
return nil, fmt.Errorf("invalid tag type %T", value)
}
return ptag, nil
}

View File

@@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["batchwrite_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = ["//app/service/main/dapper/model:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = ["batchwrite.go"],
importpath = "go-common/app/service/main/dapper/pkg/batchwrite",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/model:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/pkg/errors:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,174 @@
package batchwrite
import (
"context"
"sync"
"time"
"github.com/pkg/errors"
"go-common/app/service/main/dapper/model"
"go-common/library/log"
)
var (
_writeTimeout = time.Second
// ErrClosed .
ErrClosed = errors.New("batchwriter already closed")
)
// BatchWriter BatchWriter
type BatchWriter interface {
WriteSpan(span *model.Span) error
Close() error
// internale queue length
QueueLen() int
}
type rawBundle struct {
key string
data map[string][]byte
}
// NewRawDataBatchWriter NewRawDataBatchWriter
func NewRawDataBatchWriter(writeFunc func(context.Context, string, map[string][]byte) error, bufSize, chanSize, workers int, interval time.Duration) BatchWriter {
if workers <= 0 {
workers = 1
}
if interval <= 0 {
interval = 5 * time.Second
}
rbw := &rawDataBatchWrite{
maxBufSize: bufSize,
ch: make(chan *rawBundle, chanSize),
bufMap: make(map[string]map[string][]byte),
timeout: 10 * time.Second,
writeFunc: writeFunc,
}
rbw.wg.Add(workers)
for i := 0; i < workers; i++ {
go rbw.worker()
}
rbw.flushTicker = time.NewTicker(interval)
go rbw.daemonFlush()
return rbw
}
type rawDataBatchWrite struct {
mx sync.Mutex
closed bool
maxBufSize int
sizeCount int
bufMap map[string]map[string][]byte
ch chan *rawBundle
timeout time.Duration
writeFunc func(context.Context, string, map[string][]byte) error
wg sync.WaitGroup
flushTicker *time.Ticker
}
func (r *rawDataBatchWrite) WriteSpan(span *model.Span) error {
data, err := span.Marshal()
if err != nil {
return err
}
traceID := span.TraceIDStr()
spanID := span.SpanIDStr()
kind := "_s"
if !span.IsServer() {
kind = "_c"
}
key := spanID + kind
var bufMap map[string]map[string][]byte
r.mx.Lock()
if r.sizeCount > r.maxBufSize {
bufMap = r.bufMap
r.bufMap = make(map[string]map[string][]byte)
r.sizeCount = 0
}
r.sizeCount += len(data)
if _, ok := r.bufMap[traceID]; !ok {
r.bufMap[traceID] = make(map[string][]byte)
}
r.bufMap[traceID][key] = data
closed := r.closed
r.mx.Unlock()
if closed {
return ErrClosed
}
if bufMap != nil {
return r.flushBufMap(bufMap)
}
return nil
}
func (r *rawDataBatchWrite) QueueLen() int {
return len(r.ch)
}
func (r *rawDataBatchWrite) daemonFlush() {
for range r.flushTicker.C {
if err := r.flush(); err != nil {
log.Error("flush raw data error: %s", err)
}
}
}
func (r *rawDataBatchWrite) flush() error {
var bufMap map[string]map[string][]byte
r.mx.Lock()
if r.sizeCount != 0 {
bufMap = r.bufMap
r.bufMap = make(map[string]map[string][]byte)
r.sizeCount = 0
}
r.mx.Unlock()
if bufMap != nil {
return r.flushBufMap(bufMap)
}
return nil
}
func (r *rawDataBatchWrite) flushBufMap(bufMap map[string]map[string][]byte) error {
timer := time.NewTimer(_writeTimeout)
for traceID, data := range bufMap {
select {
case <-timer.C:
return errors.New("write span timeout, raw data buffer channel is full")
case r.ch <- &rawBundle{
key: traceID,
data: data,
}:
}
}
return nil
}
func (r *rawDataBatchWrite) Close() error {
r.mx.Lock()
defer r.mx.Unlock()
r.closed = true
r.flushTicker.Stop()
bufMap := r.bufMap
r.bufMap = make(map[string]map[string][]byte)
r.sizeCount = 0
r.flushBufMap(bufMap)
close(r.ch)
r.wg.Wait()
return nil
}
func (r *rawDataBatchWrite) worker() {
for bundle := range r.ch {
if err := r.write(bundle); err != nil {
log.Error("batch write raw data error: %s", err)
}
}
r.wg.Done()
}
func (r *rawDataBatchWrite) write(bundle *rawBundle) error {
ctx, cancel := context.WithTimeout(context.Background(), r.timeout)
defer cancel()
return r.writeFunc(ctx, bundle.key, bundle.data)
}

View File

@@ -0,0 +1,89 @@
package batchwrite
import (
"context"
"math/rand"
"testing"
"go-common/app/service/main/dapper/model"
)
var (
emptyspan = &model.Span{}
)
func TestRawDataBatchWriter(t *testing.T) {
storage := make(map[string]map[string][]byte)
writeFunc := func(ctx context.Context, traceID string, data map[string][]byte) error {
if _, ok := storage[traceID]; !ok {
storage[traceID] = make(map[string][]byte)
}
for k, v := range data {
storage[traceID][k] = v
}
return nil
}
rbw := NewRawDataBatchWriter(writeFunc, 16, 2, 2, 0)
spans := []*model.Span{
&model.Span{
TraceID: 1,
SpanID: 11,
},
&model.Span{
TraceID: 1,
SpanID: 12,
},
&model.Span{
TraceID: 2,
SpanID: 21,
},
&model.Span{
TraceID: 2,
SpanID: 22,
},
}
for _, span := range spans {
if err := rbw.WriteSpan(span); err != nil {
t.Error(err)
}
}
rbw.Close()
if len(storage) != 2 {
t.Errorf("expect get 2 trace data, get %v", storage)
}
if len(storage["1"]) != 2 {
t.Errorf("expect get 2 span data, get %v", storage["1"])
}
t.Logf("%v\n", storage)
}
func TestBatchWriterClosed(t *testing.T) {
writeFunc2 := func(ctx context.Context, traceID string, data map[string][]byte) error {
return nil
}
rbw := NewRawDataBatchWriter(writeFunc2, 1024*1024, 2, 2, 0)
rbw.Close()
if err := rbw.WriteSpan(emptyspan); err != ErrClosed {
t.Errorf("expect err == ErrClosed get: %v", err)
}
}
func randSpan() *model.Span {
return &model.Span{
TraceID: rand.Uint64() % 128,
SpanID: rand.Uint64() % 16,
}
}
func BenchmarkRawDataWriter(b *testing.B) {
writeFunc := func(ctx context.Context, traceID string, data map[string][]byte) error {
return nil
}
rbw := NewRawDataBatchWriter(writeFunc, 1024*1024, 2, 2, 0)
for i := 0; i < b.N; i++ {
if err := rbw.WriteSpan(randSpan()); err != nil {
b.Error(err)
}
}
rbw.Close()
}

View File

@@ -0,0 +1,33 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["collect.go"],
importpath = "go-common/app/service/main/dapper/pkg/collect",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//app/service/main/dapper/pkg/process:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/dapper/pkg/collect/kafkacollect:all-srcs",
"//app/service/main/dapper/pkg/collect/tcpcollect:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,12 @@
package collect
import (
"go-common/app/service/main/dapper/pkg/process"
)
// Collecter collect span from different source
type Collecter interface {
Start() error
RegisterProcess(p process.Processer)
Close() error
}

View File

@@ -0,0 +1,49 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["collect_test.go"],
embed = [":go_default_library"],
tags = ["automanaged"],
deps = [
"//app/service/main/dapper/model:go_default_library",
"//app/service/main/dapper/pkg/process:go_default_library",
"//library/log:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = ["collect.go"],
importpath = "go-common/app/service/main/dapper/pkg/collect/kafkacollect",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/model:go_default_library",
"//app/service/main/dapper/pkg/collect:go_default_library",
"//app/service/main/dapper/pkg/process:go_default_library",
"//library/log:go_default_library",
"//library/stat/prom:go_default_library",
"//vendor/github.com/Shopify/sarama:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,173 @@
package kafkacollect
import (
"context"
"encoding/json"
"fmt"
"sync"
"github.com/Shopify/sarama"
"go-common/app/service/main/dapper/model"
"go-common/app/service/main/dapper/pkg/collect"
"go-common/app/service/main/dapper/pkg/process"
"go-common/library/log"
"go-common/library/stat/prom"
)
var (
collectCount = prom.New().WithCounter("dapper_kafka_collect_count", []string{"name"})
collectErrCount = prom.New().WithCounter("dapper_kafka_collect_err_count", []string{"name"})
)
// Option set option
type Option func(*option)
type option struct {
group string
topic string
addrs []string
}
func (o option) saramaConfig() *sarama.Config {
return nil
}
var defaultOption = option{
group: "default",
}
//func NewConsumer(addrs []string, config *Config) (Consumer, error)
// New kafka collect
func New(topic string, addrs []string, options ...Option) (collect.Collecter, error) {
log.V(10).Info("new kafkacollect topic %s addrs: %v", topic, addrs)
if len(addrs) == 0 {
return nil, fmt.Errorf("kafka addrs required")
}
opt := defaultOption
for _, fn := range options {
fn(&opt)
}
opt.addrs = addrs
opt.topic = topic
clt := &kafkaCollect{opt: opt}
return clt, nil
}
type kafkaCollect struct {
wg sync.WaitGroup
opt option
ps []process.Processer
consumers []*consumer
client sarama.Client
offsetManager sarama.OffsetManager
baseConsumer sarama.Consumer
}
func (k *kafkaCollect) RegisterProcess(p process.Processer) {
k.ps = append(k.ps, p)
}
func (k *kafkaCollect) Start() error {
var err error
if k.client, err = sarama.NewClient(k.opt.addrs, k.opt.saramaConfig()); err != nil {
return fmt.Errorf("new kafka client error: %s", err)
}
if k.offsetManager, err = sarama.NewOffsetManagerFromClient(k.opt.group, k.client); err != nil {
return fmt.Errorf("new offset manager error: %s", err)
}
if k.baseConsumer, err = sarama.NewConsumerFromClient(k.client); err != nil {
return fmt.Errorf("new kafka consumer error: %s", err)
}
log.Info("kafkacollect consumer from topic: %s addrs: %s", k.opt.topic, k.opt.topic)
return k.start()
}
func (k *kafkaCollect) handler(protoSpan *model.ProtoSpan) {
var err error
for _, p := range k.ps {
if err = p.Process(context.Background(), protoSpan); err != nil {
log.Error("process span error: %s, discard", err)
}
}
}
func (k *kafkaCollect) start() error {
ps, err := k.client.Partitions(k.opt.topic)
if err != nil {
return fmt.Errorf("get partitions error: %s", err)
}
for _, p := range ps {
var pom sarama.PartitionOffsetManager
if pom, err = k.offsetManager.ManagePartition(k.opt.topic, p); err != nil {
return fmt.Errorf("new manage partition error: %s", err)
}
offset, _ := pom.NextOffset()
if offset == -1 {
offset = sarama.OffsetOldest
}
var c sarama.PartitionConsumer
log.V(10).Info("partitions %d start offset %d", p, offset)
if c, err = k.baseConsumer.ConsumePartition(k.opt.topic, p, offset); err != nil {
return fmt.Errorf("new consume partition error: %s", err)
}
log.V(10).Info("start partition consumer partition: %d, offset: %d", p, offset)
consumer := newConsumer(k, c, pom)
k.consumers = append(k.consumers, consumer)
k.wg.Add(1)
go consumer.start()
}
return nil
}
func (k *kafkaCollect) Close() error {
for _, c := range k.consumers {
if err := c.close(); err != nil {
log.Warn("close consumer error: %s", err)
}
}
k.wg.Wait()
return nil
}
func newConsumer(k *kafkaCollect, c sarama.PartitionConsumer, pom sarama.PartitionOffsetManager) *consumer {
return &consumer{kafkaCollect: k, consumer: c, pom: pom, closeCh: make(chan struct{}, 1)}
}
type consumer struct {
*kafkaCollect
pom sarama.PartitionOffsetManager
consumer sarama.PartitionConsumer
closeCh chan struct{}
}
func (c *consumer) close() error {
c.closeCh <- struct{}{}
c.pom.Close()
return c.consumer.Close()
}
func (c *consumer) start() {
defer c.wg.Done()
var err error
var value []byte
for {
select {
case msg := <-c.consumer.Messages():
collectCount.Incr("count")
c.pom.MarkOffset(msg.Offset+1, "")
log.V(10).Info("receive message from kafka topic: %s key: %s content: %s", msg.Key, msg.Topic, msg.Value)
protoSpan := new(model.ProtoSpan)
if err = json.Unmarshal(msg.Value, protoSpan); err != nil {
collectErrCount.Incr("count_error")
log.Error("unmarshal span from kafka error: %s, value: %v", err, value)
continue
}
c.handler(protoSpan)
case <-c.closeCh:
log.V(10).Info("receive closed return")
return
}
}
}

View File

@@ -0,0 +1,32 @@
package kafkacollect
import (
"context"
"flag"
"fmt"
"testing"
"time"
"go-common/app/service/main/dapper/model"
"go-common/app/service/main/dapper/pkg/process"
"go-common/library/log"
)
func TestKafkaCollect(t *testing.T) {
flag.Parse()
log.Init(nil)
clt, err := New("lancer_main_dapper_collector", []string{"172.18.33.163:9092", "172.18.33.164:9092", "172.18.33.165:9092"})
if err != nil {
t.Fatal(err)
}
m := process.MockProcess(func(ctx context.Context, protoSpan *model.ProtoSpan) error {
fmt.Printf("%v\n", protoSpan)
return nil
})
clt.RegisterProcess(m)
if err := clt.Start(); err != nil {
t.Fatal(err)
}
defer clt.Close()
time.Sleep(time.Minute)
}

View File

@@ -0,0 +1,54 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["server_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/model:go_default_library",
"//app/service/main/dapper/pkg/process:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = ["server.go"],
importpath = "go-common/app/service/main/dapper/pkg/collect/tcpcollect",
tags = ["manual"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/conf:go_default_library",
"//app/service/main/dapper/model:go_default_library",
"//app/service/main/dapper/pkg/process:go_default_library",
"//library/log:go_default_library",
"//library/net/trace/proto:go_default_library",
"//library/stat/counter:go_default_library",
"//library/stat/prom:go_default_library",
"@com_github_golang_protobuf//proto:go_default_library",
"@io_bazel_rules_go//proto/wkt:duration_go_proto",
"@io_bazel_rules_go//proto/wkt:timestamp_go_proto",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,256 @@
package tcpcollect
import (
"bufio"
"bytes"
"context"
"encoding/binary"
"net"
"strconv"
"sync"
"time"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes/duration"
"github.com/golang/protobuf/ptypes/timestamp"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/model"
"go-common/app/service/main/dapper/pkg/process"
"go-common/library/log"
protogen "go-common/library/net/trace/proto"
"go-common/library/stat/counter"
"go-common/library/stat/prom"
)
var (
collectCount = prom.New().WithCounter("dapper_collect_count", []string{"remote_host"})
collectErrCount = prom.New().WithCounter("dapper_collect_err_count", []string{"remote_host"})
)
const (
_magicSize = 2
_headerSize = 6
)
var (
_magicBuf = []byte{0xAC, 0xBE}
_separator = []byte("\001")
)
// ClientStatus agent client status
type ClientStatus struct {
Addr string
Counter counter.Counter
ErrorCounter counter.Counter
UpTime int64
}
func (c *ClientStatus) incr(iserr bool) {
if iserr {
collectErrCount.Incr(c.ClientHost())
}
collectCount.Incr(c.ClientHost())
c.Counter.Add(1)
}
// ClientHost extract from client addr
func (c *ClientStatus) ClientHost() string {
host, _, _ := net.SplitHostPort(c.Addr)
return host
}
// TCPCollect tcp server.
type TCPCollect struct {
cfg *conf.Collect
lis net.Listener
clientMap map[string]*ClientStatus
rmx sync.RWMutex
ps []process.Processer
}
// New tcp server.
func New(cfg *conf.Collect) *TCPCollect {
svr := &TCPCollect{
cfg: cfg,
clientMap: make(map[string]*ClientStatus),
}
return svr
}
// RegisterProcess implement process.Processer
func (s *TCPCollect) RegisterProcess(p process.Processer) {
s.ps = append(s.ps, p)
}
func (s *TCPCollect) addClient(cs *ClientStatus) {
s.rmx.Lock()
defer s.rmx.Unlock()
s.clientMap[cs.Addr] = cs
}
func (s *TCPCollect) removeClient(cs *ClientStatus) {
s.rmx.Lock()
defer s.rmx.Unlock()
delete(s.clientMap, cs.Addr)
}
// ClientStatus ClientStatus
func (s *TCPCollect) ClientStatus() []*ClientStatus {
s.rmx.RLock()
defer s.rmx.RUnlock()
css := make([]*ClientStatus, 0, len(s.clientMap))
for _, cs := range s.clientMap {
css = append(css, cs)
}
return css
}
// Start tcp server.
func (s *TCPCollect) Start() error {
var err error
if s.lis, err = net.Listen(s.cfg.Network, s.cfg.Addr); err != nil {
return err
}
go func() {
for {
conn, err := s.lis.Accept()
if err != nil {
if netE, ok := err.(net.Error); ok && netE.Temporary() {
log.Error("l.Accept() error(%v)", err)
time.Sleep(time.Second)
continue
}
return
}
go s.serveConn(conn)
}
}()
log.Info("tcp server start addr:%s@%s", s.cfg.Network, s.cfg.Addr)
return nil
}
// Close tcp server.
func (s *TCPCollect) Close() error {
return s.lis.Close()
}
func (s *TCPCollect) serveConn(conn net.Conn) {
log.Info("serverConn remoteIP:%s", conn.RemoteAddr().String())
cs := &ClientStatus{
Addr: conn.RemoteAddr().String(),
Counter: counter.NewRolling(time.Second, 100),
ErrorCounter: counter.NewGauge(),
UpTime: time.Now().Unix(),
}
s.addClient(cs)
defer conn.Close()
defer s.removeClient(cs)
rd := bufio.NewReaderSize(conn, 65536)
for {
buf, err := s.tailPacket(rd)
if err != nil {
log.Error("s.tailPacket() remoteIP:%s error(%v)", conn.RemoteAddr().String(), err)
cs.incr(true)
return
}
if len(buf) == 0 {
log.Error("s.tailPacket() is empty")
cs.incr(true)
continue
}
data := buf
fields := bytes.Split(buf, _separator)
if len(fields) >= 16 {
if data, err = s.legacySpan(fields[2:]); err != nil {
log.Error("convert legacy span error: %s", err)
continue
}
}
protoSpan := new(protogen.Span)
if err = proto.Unmarshal(data, protoSpan); err != nil {
log.Error("unmarshal data %s error: %s", err, data)
continue
}
for _, p := range s.ps {
if pe := p.Process(context.Background(), (*model.ProtoSpan)(protoSpan)); pe != nil {
log.Error("process span %s error: %s", protoSpan, err)
}
}
cs.incr(err != nil)
}
}
func (s *TCPCollect) tailPacket(rr *bufio.Reader) (res []byte, err error) {
var buf []byte
// peek magic
for {
if buf, err = rr.Peek(_magicSize); err != nil {
return
}
if bytes.Equal(buf, _magicBuf) {
break
}
rr.Discard(1)
}
// peek length
if buf, err = rr.Peek(_headerSize); err != nil {
return
}
// peek body
packetLen := int(binary.BigEndian.Uint32(buf[_magicSize:_headerSize]))
if buf, err = rr.Peek(_headerSize + packetLen); err != nil {
return
}
res = buf[_headerSize+_magicSize:]
rr.Discard(packetLen + _headerSize)
return
}
// startTime/endTime/traceID/spanID/parentID/event/level/class/sample/address/family/title/comment/caller/error
func (s *TCPCollect) legacySpan(fields [][]byte) ([]byte, error) {
startAt, _ := strconv.ParseInt(string(fields[0]), 10, 64)
finishAt, _ := strconv.ParseInt(string(fields[1]), 10, 64)
traceID, _ := strconv.ParseUint(string(fields[2]), 10, 64)
spanID, _ := strconv.ParseUint(string(fields[3]), 10, 64)
parentID, _ := strconv.ParseUint(string(fields[4]), 10, 64)
event, _ := strconv.Atoi(string(fields[5]))
start := 8
if len(fields) == 14 {
start = 7
}
address := string(fields[start+1])
family := string(fields[start+2])
title := string(fields[start+3])
comment := string(fields[start+4])
caller := string(fields[start+5])
errMsg := string(fields[start+6])
span := &protogen.Span{Version: 2}
span.ServiceName = family
span.OperationName = title
span.Caller = caller
span.TraceId = traceID
span.SpanId = spanID
span.ParentId = parentID
span.StartTime = &timestamp.Timestamp{
Seconds: startAt / int64(time.Second),
Nanos: int32(startAt % int64(time.Second)),
}
d := finishAt - startAt
span.Duration = &duration.Duration{
Seconds: d / int64(time.Second),
Nanos: int32(d % int64(time.Second)),
}
if event == 0 {
span.Tags = append(span.Tags, &protogen.Tag{Key: "span.kind", Kind: protogen.Tag_STRING, Value: []byte("client")})
} else {
span.Tags = append(span.Tags, &protogen.Tag{Key: "span.kind", Kind: protogen.Tag_STRING, Value: []byte("server")})
}
span.Tags = append(span.Tags, &protogen.Tag{Key: "legacy.address", Kind: protogen.Tag_STRING, Value: []byte(address)})
span.Tags = append(span.Tags, &protogen.Tag{Key: "legacy.comment", Kind: protogen.Tag_STRING, Value: []byte(comment)})
if errMsg != "" {
span.Logs = append(span.Logs, &protogen.Log{Key: "legacy.error", Fields: []*protogen.Field{&protogen.Field{Key: "error", Value: []byte(errMsg)}}})
}
return proto.Marshal(span)
}

View File

@@ -0,0 +1,45 @@
package tcpcollect
import (
"context"
"io"
"net"
"os"
"testing"
"time"
"go-common/app/service/main/dapper/conf"
"go-common/app/service/main/dapper/model"
"go-common/app/service/main/dapper/pkg/process"
)
func TestCollect(t *testing.T) {
count := 0
collect := New(&conf.Collect{Network: "tcp", Addr: "127.0.0.1:6190"})
collect.RegisterProcess(process.MockProcess(func(context.Context, *model.ProtoSpan) error {
count++
return nil
}))
if err := collect.Start(); err != nil {
t.Fatal(err)
}
fp, err := os.Open("testdata/data.bin")
if err != nil {
t.Fatal(err)
}
defer fp.Close()
conn, err := net.Dial("tcp", "127.0.0.1:6190")
if err != nil {
t.Fatal(err)
}
defer conn.Close()
_, err = io.Copy(conn, fp)
if err != nil {
t.Error(err)
}
time.Sleep(time.Second)
if count <= 0 {
t.Errorf("expect more than one span write")
}
}

Binary file not shown.

View File

@@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["deliver_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["deliver.go"],
importpath = "go-common/app/service/main/dapper/pkg/deliver",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//library/log:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,155 @@
package deliver
import (
"encoding/binary"
"fmt"
"math/rand"
"net"
"sync"
"time"
"go-common/library/log"
)
var (
_magicBuf = []byte{0xAC, 0xBE}
_bufpool sync.Pool
)
func init() {
rand.Seed(time.Now().UnixNano())
_bufpool = sync.Pool{New: func() interface{} {
return make([]byte, 0, 4096)
}}
}
func freeBuf(buf []byte) {
buf = buf[:0]
_bufpool.Put(buf)
}
func getBuf() []byte {
return _bufpool.Get().([]byte)
}
// Deliver deliver span to dapper-service through tcp
type Deliver struct {
servers []string
readFn func() ([]byte, error)
conn *net.TCPConn
dataCh chan []byte
closeCh chan struct{}
closed bool
}
// New Deliver
func New(servers []string, readFn func() ([]byte, error)) (*Deliver, error) {
if len(servers) == 0 {
return nil, fmt.Errorf("no server provide")
}
d := &Deliver{
servers: servers,
readFn: readFn,
closeCh: make(chan struct{}, 1),
dataCh: make(chan []byte),
}
return d, d.start()
}
func (d *Deliver) start() error {
if err := d.dial(); err != nil {
return err
}
go d.fetch()
go d.loop()
return nil
}
func (d *Deliver) fetch() {
for {
if d.closed {
return
}
data, err := d.readFn()
if err != nil {
log.Error("deliver read data error: %s", err)
continue
}
d.dataCh <- data
}
}
func (d *Deliver) loop() {
for {
select {
case <-d.closeCh:
return
case data := <-d.dataCh:
data = warpData(data)
send:
_, err := d.conn.Write(data)
if err == nil {
freeBuf(data)
continue
}
d.reDial()
goto send
}
}
}
// Close deliver
func (d *Deliver) Close() error {
if d.closed {
return fmt.Errorf("already closed")
}
d.closed = true
d.closeCh <- struct{}{}
timer := time.NewTimer(50 * time.Millisecond)
select {
case data := <-d.dataCh:
// write last data to conn
_, err := d.conn.Write(data)
return fmt.Errorf("write last data error: %s", err)
case <-timer.C:
return nil
}
return nil
}
func (d *Deliver) reDial() {
if d.conn != nil {
d.conn.Close()
}
for {
if err := d.dial(); err != nil {
log.Error("redial error: %s, retry after second", err)
time.Sleep(time.Second)
}
break
}
}
func (d *Deliver) dial() error {
server := chioceServer(d.servers)
conn, err := net.Dial("tcp", server)
if err != nil {
return fmt.Errorf("dial tcp://%s error: %s", server, err)
}
d.conn = conn.(*net.TCPConn)
d.conn.SetKeepAlive(true)
return nil
}
func chioceServer(servers []string) string {
return servers[rand.Intn(len(servers))]
}
func warpData(data []byte) []byte {
buf := getBuf()
buf = append(buf, _magicBuf...)
buf = append(buf, []byte{0, 0, 0, 0, 0, 0}...)
binary.BigEndian.PutUint32(buf[2:6], uint32(len(data)+2))
buf = append(buf, data...)
return buf
}

View File

@@ -0,0 +1,45 @@
package deliver
import (
"bytes"
"encoding/binary"
"io"
"net"
"testing"
"time"
)
func TestDeliver(t *testing.T) {
buf := &bytes.Buffer{}
lis, err := net.Listen("tcp", "127.0.0.1:12233")
if err != nil {
t.Fatal(err)
}
go func() {
conn, err := lis.Accept()
if err != nil {
t.Fatal(err)
}
io.Copy(buf, conn)
}()
data := []byte("hello world")
readed := make(chan bool, 1)
d, err := New([]string{"127.0.0.1:12233"}, func() ([]byte, error) {
readed <- true
return data, nil
})
if err != nil {
t.Fatal(err)
}
time.Sleep(500 * time.Millisecond)
if !bytes.Equal(buf.Bytes()[0:2], _magicBuf) {
t.Error("invalid data, wrong magic header")
}
if int(binary.BigEndian.Uint32(buf.Bytes()[2:6])) != len(data) {
t.Error("wrong data length")
}
if !bytes.Equal(buf.Bytes()[6:], data) {
t.Errorf("invalid content %s", buf.Bytes()[6:])
}
d.Close()
}

View File

@@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = [
"bucket_test.go",
"diskqueue_test.go",
],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = [
"bucket.go",
"diskqueue.go",
],
importpath = "go-common/app/service/main/dapper/pkg/diskqueue",
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,200 @@
package diskqueue
import (
"bufio"
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"os"
"sync"
)
const (
_blockByte int32 = 512
_lenByte int32 = 2
_dataByte = _blockByte - _lenByte
)
var errBucketFull = errors.New("bucket is full or not enough")
var fullHeader = []byte{1, 254}
var nextHeader = []byte{1, 255}
var magicHeader = []byte{'D', 'Q'}
type memBucketPool struct {
cap int32
pool sync.Pool
}
func newMemBucketPool(bucketByte int32) *memBucketPool {
return &memBucketPool{
pool: sync.Pool{New: func() interface{} {
return make([]byte, bucketByte)
}},
cap: bucketByte / _blockByte,
}
}
func (m *memBucketPool) new() *memBucket {
data := m.pool.Get().([]byte)
return &memBucket{data: data, cap: m.cap}
}
func (m *memBucketPool) free(bucket *memBucket) {
m.pool.Put(bucket.data)
}
type memBucket struct {
sync.Mutex
cap int32
readAt int32
writeAt int32
data []byte
}
func (m *memBucket) push(p []byte) error {
m.Lock()
defer m.Unlock()
length := int32(len(p))
if length > _dataByte*(m.cap-m.writeAt) {
return errBucketFull
}
// if p length < blockbyte write it direct
if length < _dataByte {
ds := m.writeAt * _blockByte
binary.BigEndian.PutUint16(m.data[ds:], uint16(length))
copy(m.data[ds+_lenByte:], p)
m.writeAt++
return nil
}
// loop write block
blocks := length / _dataByte
re := length % _dataByte
var i int32
for i = 0; i < blocks-1; i++ {
ds := m.writeAt * _blockByte
copy(m.data[ds:], nextHeader)
ps := i * _dataByte
copy(m.data[ds+_lenByte:], p[ps:ps+_dataByte])
m.writeAt++
}
var nh []byte
if re == 0 {
nh = fullHeader
} else {
nh = nextHeader
}
ds := m.writeAt * _blockByte
copy(m.data[ds:], nh)
ps := (blocks - 1) * _dataByte
copy(m.data[ds+_lenByte:], p[ps:ps+_dataByte])
m.writeAt++
if re != 0 {
ds := m.writeAt * _blockByte
binary.BigEndian.PutUint16(m.data[ds:], uint16(re))
copy(m.data[ds+_lenByte:], p[blocks*_dataByte:])
m.writeAt++
}
return nil
}
func (m *memBucket) pop() ([]byte, error) {
m.Lock()
defer m.Unlock()
if m.readAt >= m.writeAt {
return nil, io.EOF
}
ret := make([]byte, 0, _blockByte)
for m.readAt < m.writeAt {
ds := m.readAt * _blockByte
m.readAt++
l := int32(binary.BigEndian.Uint16(m.data[ds : ds+_lenByte]))
if l <= _dataByte {
ret = append(ret, m.data[ds+_lenByte:ds+_lenByte+l]...)
break
}
ret = append(ret, m.data[ds+_lenByte:ds+_blockByte]...)
}
return ret, nil
}
func (m *memBucket) dump(w io.Writer) (int, error) {
header := make([]byte, 10)
copy(header, magicHeader)
binary.BigEndian.PutUint32(header[2:6], uint32(m.readAt))
binary.BigEndian.PutUint32(header[6:10], uint32(m.writeAt))
n1, err := w.Write(header)
if err != nil {
return n1, err
}
n2, err := w.Write(m.data[:m.writeAt*_blockByte])
return n1 + n2, err
}
func newFileBucket(fpath string) (*fileBucket, error) {
fp, err := os.Open(fpath)
if err != nil {
return nil, err
}
header := make([]byte, 10)
n, err := fp.Read(header)
if err != nil {
return nil, err
}
if n != 10 {
return nil, fmt.Errorf("expect read 10 byte header get: %d", n)
}
if !bytes.Equal(header[:2], magicHeader) {
return nil, fmt.Errorf("invalid magic %s", header[:2])
}
readAt := int32(binary.BigEndian.Uint32(header[2:6]))
writeAt := int32(binary.BigEndian.Uint32(header[6:10]))
if _, err = fp.Seek(int64(readAt*_blockByte), os.SEEK_CUR); err != nil {
return nil, err
}
return &fileBucket{
fp: fp,
readAt: readAt,
writeAt: writeAt,
bufRd: bufio.NewReader(fp),
}, nil
}
type fileBucket struct {
sync.Mutex
fp *os.File
readAt int32
writeAt int32
bufRd *bufio.Reader
}
func (f *fileBucket) pop() ([]byte, error) {
f.Lock()
defer f.Unlock()
if f.readAt >= f.writeAt {
return nil, io.EOF
}
ret := make([]byte, 0, _blockByte)
block := make([]byte, _blockByte)
for f.readAt < f.writeAt {
n, err := f.bufRd.Read(block)
if err != nil {
return nil, err
}
if int32(n) != _blockByte {
return nil, fmt.Errorf("expect read %d byte data get %d", _blockByte, n)
}
l := int32(binary.BigEndian.Uint16(block[:2]))
if l <= _dataByte {
ret = append(ret, block[2:2+l]...)
break
}
ret = append(ret, block[2:_blockByte]...)
}
return ret, nil
}
func (f *fileBucket) close() error {
return f.fp.Close()
}

View File

@@ -0,0 +1,105 @@
package diskqueue
import (
"crypto/rand"
"io"
"os"
"reflect"
"testing"
)
func Test_membucket(t *testing.T) {
cap := int32(16)
data := make([]byte, _blockByte*cap)
mb := &memBucket{
cap: cap,
data: data,
}
t.Run("test push & pop small data", func(t *testing.T) {
p := []byte("hello world")
err := mb.push(p)
if err != nil {
t.Error(err)
}
ret, err := mb.pop()
if err != nil {
t.Error(err)
} else {
if !reflect.DeepEqual(ret, p) {
t.Errorf("%s not equal %s", ret, p)
}
}
})
t.Run("test push & pop big data", func(t *testing.T) {
p := make([]byte, 1890)
rand.Read(p)
err := mb.push(p)
if err != nil {
t.Error(err)
}
ret, err := mb.pop()
if err != nil {
t.Error(err)
} else {
if !reflect.DeepEqual(ret, p) {
t.Logf("buf: %v", mb.data)
t.Errorf("%v not equal %v", ret, p)
}
}
})
t.Run("push big data", func(t *testing.T) {
p := make([]byte, _blockByte*cap*2)
err := mb.push(p)
if err != errBucketFull {
t.Errorf("expect err == errBucketFull get: %v", err)
}
})
t.Run("pop io.EOF", func(t *testing.T) {
_, err := mb.pop()
if err != io.EOF {
t.Errorf("expect err == io.EOF get: %v", err)
}
})
}
func Test_fileBucket(t *testing.T) {
fpath := "bucket.bin"
defer os.RemoveAll(fpath)
cap := int32(16)
data := make([]byte, _blockByte*cap)
mb := &memBucket{
cap: cap,
data: data,
}
d1 := []byte("hello world")
for i := 0; i < 10; i++ {
mb.push(d1)
}
fp, err := os.OpenFile(fpath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
if err != nil {
t.Fatal(err)
}
mb.dump(fp)
fp.Close()
fb, err := newFileBucket(fpath)
if err != nil {
t.Fatal(err)
}
count := 0
for {
ret, err := fb.pop()
if err != nil {
if err != io.EOF {
t.Error(err)
}
break
}
count++
if !reflect.DeepEqual(ret, d1) {
t.Errorf("%v not equal %v", ret, d1)
}
}
if count != 10 {
t.Errorf("expect 10 data get %d", count)
}
}

View File

@@ -0,0 +1,460 @@
package diskqueue
import (
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path"
"sort"
"strconv"
"strings"
"sync"
"sync/atomic"
)
const (
// max memroy use equal to BucketByte * (MemBucket + DynamicMemBucket)
_defaultBucketByte = _blockByte * 2 * 1024 * 16 // 16MB
_defaultMemBucket = 1
_defaultDynamicMemBucket = 3
_filePrefix = "disk_queue_"
)
// node status
const (
_inmem int8 = iota
_indisk
_freed
)
var _globalID int64
// ErrQueueFull .
var ErrQueueFull = errors.New("error queue is full, can't create new membucket")
func nextNodeID() int64 {
return atomic.AddInt64(&_globalID, 1)
}
// DiskQueue disk queue
type DiskQueue interface {
Push(p []byte) error
Pop() ([]byte, error)
Close() error
}
// Option Ringbuffer option
type Option func(opt *option)
// SetBucketByte bucketbyte
func SetBucketByte(n int) Option {
return func(opt *option) {
opt.bucketByte = (int32(n) / _blockByte) * _blockByte
}
}
// SetMemBucket set the number of mem bucket
func SetMemBucket(n int) Option {
return func(opt *option) {
opt.memBucket = int32(n)
}
}
// SetDynamicMemBucket set the number of dynamic mem bucket
func SetDynamicMemBucket(n int) Option {
return func(opt *option) {
opt.dynamicMemBucket = int32(n)
}
}
// SetMaxBucket set the number of max bucket 0 represent unlimit
func SetMaxBucket(n int) Option {
return func(opt *option) {
opt.maxBucket = int32(n)
}
}
type option struct {
bucketByte int32
memBucket int32
maxBucket int32
dynamicMemBucket int32
fpath string
}
func (o option) validate() error {
if o.bucketByte <= 0 {
return fmt.Errorf("bucket byte must > 0")
}
if o.memBucket <= 0 {
return fmt.Errorf("mem bucket must > 0")
}
if o.dynamicMemBucket <= 0 {
return fmt.Errorf("dynamic mem bucket must > 0")
}
return nil
}
var _defaultOpt = option{
bucketByte: _defaultBucketByte,
memBucket: _defaultMemBucket,
dynamicMemBucket: _defaultDynamicMemBucket,
}
// New Ringbuffer
func New(fpath string, options ...Option) (DiskQueue, error) {
info, err := os.Stat(fpath)
if err != nil {
if !os.IsNotExist(err) {
return nil, fmt.Errorf("stat %s error: %s", fpath, err)
}
if err = os.MkdirAll(fpath, 0755); err != nil {
return nil, fmt.Errorf("fpath %s not exists try create directry error: %s", fpath, err)
}
} else if !info.IsDir() {
return nil, fmt.Errorf("fpath: %s already exists and not a directory", fpath)
}
// TODO: check permission
opt := _defaultOpt
opt.fpath = fpath
for _, fn := range options {
fn(&opt)
}
if err = opt.validate(); err != nil {
return nil, err
}
b := &base{
opt: opt,
}
if opt.maxBucket == 0 {
return &queue{base: b}, b.init()
}
return nil, nil
}
type node struct {
id int64
mx sync.Mutex
flushing bool
bucket *memBucket
next *node
fpath string
fbucket *fileBucket
kind int8
}
func (n *node) setFlushing(flushing bool) {
n.mx.Lock()
n.flushing = flushing
n.mx.Unlock()
}
func (n *node) pop() ([]byte, error) {
n.mx.Lock()
defer n.mx.Unlock()
if n.bucket != nil {
return n.bucket.pop()
}
var err error
if n.fbucket == nil {
if n.fbucket, err = newFileBucket(n.fpath); err != nil {
return nil, err
}
}
return n.fbucket.pop()
}
type base struct {
opt option
head *node
tail *node
pool *memBucketPool
length int32
memBucket int32
}
func (b *base) init() error {
b.pool = newMemBucketPool(b.opt.bucketByte)
if loaded, err := b.loadFromFile(); err != nil || loaded {
return err
}
current := &node{
id: nextNodeID(),
bucket: b.pool.new(),
}
b.head = current
b.tail = current
return nil
}
func (b *base) loadFromFile() (bool, error) {
infos, err := ioutil.ReadDir(b.opt.fpath)
if err != nil {
return false, fmt.Errorf("readdir %s error: %s", b.opt.fpath, err)
}
var files []string
for _, info := range infos {
if info.IsDir() || !strings.HasPrefix(info.Name(), _filePrefix) {
continue
}
files = append(files, path.Join(b.opt.fpath, info.Name()))
}
if len(files) == 0 {
return false, nil
}
nodeID := func(name string) int64 {
id, err := strconv.ParseInt(path.Base(name)[len(_filePrefix):], 10, 64)
if err != nil {
panic(fmt.Errorf("invalid file name: %s error: %s", name, err))
}
return id
}
sort.Slice(files, func(i int, j int) bool {
return nodeID(files[i]) < nodeID(files[j])
})
_globalID = nodeID(files[len(files)-1])
current := &node{
id: nodeID(files[0]),
fpath: files[0],
kind: _indisk,
}
b.head = current
for _, file := range files[1:] {
next := &node{
id: nodeID(file),
fpath: file,
kind: _indisk,
}
current.next = next
current = next
}
b.memBucket = 1
next := &node{
id: nextNodeID(),
bucket: b.pool.new(),
}
current.next = next
current = next
b.tail = current
return true, nil
}
type queue struct {
*base
mx sync.Mutex
closed bool
lastID int64
wg sync.WaitGroup
}
func (q *queue) Push(p []byte) (err error) {
if len(p) >= int(q.opt.bucketByte) {
return fmt.Errorf("data too large")
}
if q.closed {
return fmt.Errorf("queue already closed")
}
for {
err = q.tail.bucket.push(p)
if err == nil {
atomic.AddInt32(&q.length, 1)
return
}
if err == errBucketFull {
if err = q.moveTail(); err != nil {
return err
}
continue
}
return
}
}
func (q *queue) moveTail() error {
bucket := atomic.LoadInt32(&q.memBucket)
if bucket >= q.opt.memBucket+q.opt.dynamicMemBucket {
return fmt.Errorf("can't assign memory bucket any more")
}
if bucket >= q.opt.maxBucket {
q.notifyStore()
}
// take tail snapshot
p := q.tail
// lock queue
q.mx.Lock()
defer q.mx.Unlock()
// tail alreay changed
if p != q.tail {
return nil
}
atomic.AddInt32(&q.memBucket, 1)
n := &node{
id: nextNodeID(),
bucket: q.pool.new(),
kind: _inmem,
}
// move to new tail
q.tail.next = n
q.tail = n
return nil
}
func (q *queue) notifyStore() {
n := q.head
for n.next != nil {
read := q.head
if n.id > q.lastID && n.kind != _indisk && n != read {
q.lastID = n.id
go q.storeNode(n)
return
}
n = n.next
}
}
func (q *queue) Pop() (data []byte, err error) {
defer func() {
if err != nil {
atomic.AddInt32(&q.length, -1)
}
}()
if q.closed {
return nil, fmt.Errorf("queue already closed")
}
data, err = q.head.pop()
if err != nil {
if err == io.EOF {
if err = q.moveHead(); err != nil {
return nil, err
}
return q.head.pop()
}
return nil, err
}
return data, nil
}
func (q *queue) moveHead() error {
tail := q.tail
if q.head == tail {
return io.EOF
}
// move head to next
q.mx.Lock()
head := q.head
q.head = q.head.next
q.mx.Unlock()
// reset head to new read node
q.freeNode(head)
return nil
}
func (q *queue) freeNode(n *node) {
n.mx.Lock()
defer n.mx.Unlock()
if n.flushing {
n.kind = _freed
return
}
if n.bucket != nil {
q.freeBucket(n.bucket)
n.bucket = nil
}
if n.fbucket != nil {
n.fbucket.close()
}
if n.fpath != "" {
if err := os.Remove(n.fpath); err != nil {
//fmt.Fprintf(os.Stderr, "[ERROR] diskqueue: remove file %s error: %s", n.fpath, err)
}
}
}
func (q *queue) storeNode(n *node) (err error) {
fpath := storePath(q.opt.fpath, n)
q.wg.Add(1)
defer q.wg.Done()
n.setFlushing(true)
// if node already free return direct
if n.bucket == nil {
return
}
// if node be freed just release membucket
if n.kind == _freed {
q.freeBucket(n.bucket)
return
}
// store bucket to disk
if err = store(fpath, n); err != nil {
fmt.Fprintf(os.Stderr, "[ERROR] diskqueue: store node error: %s", err)
}
n.fpath = fpath
n.setFlushing(false)
if n.kind == _freed {
q.freeBucket(n.bucket)
n.bucket = nil
if err := os.Remove(fpath); err != nil {
//fmt.Fprintf(os.Stderr, "[ERROR] diskqueue: remove file %s error: %s", n.fpath, err)
}
return
}
n.kind = _indisk
q.mx.Lock()
if q.head != n {
q.freeBucket(n.bucket)
n.bucket = nil
}
q.mx.Unlock()
return
}
func (q *queue) freeBucket(bucket *memBucket) {
q.pool.free(bucket)
atomic.AddInt32(&q.memBucket, -1)
}
func (q *queue) Close() error {
// set closed
q.closed = true
// wait all store goroutines finish
q.wg.Wait()
var messages []string
// store all leave node
current := q.head
for current != nil {
if current.kind == _inmem && current.bucket != nil {
fpath := storePath(q.opt.fpath, current)
if err := store(fpath, current); err != nil {
messages = append(messages, err.Error())
}
}
current = current.next
}
if len(messages) == 0 {
return nil
}
return fmt.Errorf("close queue error: %s", strings.Join(messages, "; "))
}
func store(fpath string, n *node) (err error) {
// ignore empty bucket
if n.bucket.writeAt == n.bucket.readAt {
return nil
}
var fp *os.File
fp, err = os.OpenFile(fpath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("open file %s error: %s", fpath, err)
}
_, err = n.bucket.dump(fp)
if err != nil {
return fmt.Errorf("dump data to file %s error: %s", fpath, err)
}
return
}
func storePath(base string, n *node) string {
return path.Join(base, _filePrefix+strconv.FormatInt(n.id, 10))
}

View File

@@ -0,0 +1,206 @@
package diskqueue
import (
"bytes"
"crypto/rand"
"io"
mrand "math/rand"
"os"
"os/exec"
"sync"
"testing"
"time"
)
func init() {
mrand.Seed(time.Now().UnixNano())
}
func TestDiskQueuePushPopMem(t *testing.T) {
dirname := "testdata/d1"
defer os.RemoveAll(dirname)
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
N := 10
p := []byte("hello world")
for i := 0; i < N; i++ {
if err := queue.Push(p); err != nil {
t.Error(err)
}
}
count := 0
for {
data, err := queue.Pop()
if err == io.EOF {
break
}
if err != nil {
t.Error(err)
}
if !bytes.Equal(data, p) {
t.Errorf("invalid data: %s", data)
}
count++
}
if count != N {
t.Errorf("wrong count %d", count)
}
}
func TestDiskQueueDisk(t *testing.T) {
data := make([]byte, 2233)
rand.Read(data)
count := 1024 * 256
dirname := "testdata/d2"
defer os.RemoveAll(dirname)
t.Run("test write disk", func(t *testing.T) {
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
for i := 0; i < count; i++ {
if err := queue.Push(data); err != nil {
time.Sleep(time.Second)
if err := queue.Push(data); err != nil {
t.Error(err)
}
}
}
queue.Close()
})
t.Run("test read disk", func(t *testing.T) {
n := 0
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
for {
ret, err := queue.Pop()
if err == io.EOF {
break
}
if !bytes.Equal(data, ret) {
t.Errorf("invalid data unequal")
}
n++
}
if n != count {
t.Errorf("want %d get %d", count, n)
}
})
}
func TestDiskQueueTrans(t *testing.T) {
dirname := "testdata/d3"
defer os.RemoveAll(dirname)
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
data := make([]byte, 1890)
rand.Read(data)
cycles := 512
var wg sync.WaitGroup
wg.Add(2)
done := false
writed := 0
readed := 0
go func() {
defer wg.Done()
for i := 0; i < cycles; i++ {
ms := mrand.Intn(40) + 10
time.Sleep(time.Duration(ms) * time.Millisecond)
for i := 0; i < 128; i++ {
if err := queue.Push(data); err != nil {
t.Error(err)
} else {
writed++
}
}
}
done = true
}()
go func() {
defer wg.Done()
for {
ret, err := queue.Pop()
if err == io.EOF && done {
break
}
if err == io.EOF {
ms := mrand.Intn(10)
time.Sleep(time.Duration(ms) * time.Millisecond)
continue
}
if !bytes.Equal(ret, data) {
t.Fatalf("invalid data, data length: %d, want: %d, data: %v, want: %v", len(ret), len(data), ret, data)
}
readed++
}
}()
wg.Wait()
os.RemoveAll(dirname)
if writed != readed {
t.Errorf("readed: %d != writed: %d", readed, writed)
}
}
func TestEmpty(t *testing.T) {
dirname := "testdata/d4"
defer os.RemoveAll(dirname)
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 5; i++ {
_, err := queue.Pop()
if err != io.EOF {
t.Errorf("expect err == io.EOF, get %v", err)
}
}
}
func TestEmptyCache(t *testing.T) {
datadir := "testdata/emptycache"
dirname := "testdata/de"
if err := exec.Command("cp", "-r", datadir, dirname).Run(); err != nil {
t.Error(err)
}
defer os.RemoveAll(dirname)
queue, err := New(dirname)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 5; i++ {
_, err := queue.Pop()
if err != io.EOF {
t.Errorf("expect err == io.EOF, get %v", err)
}
}
}
func BenchmarkDiskQueue(b *testing.B) {
queue, err := New("testdata/d5")
if err != nil {
b.Fatal(err)
}
done := make(chan bool, 1)
go func() {
for {
if _, err := queue.Pop(); err != nil {
if err == io.EOF {
break
}
}
}
done <- true
}()
data := make([]byte, 768)
rand.Read(data)
for i := 0; i < b.N; i++ {
queue.Push(data)
}
<-done
}

View File

@@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["pointwrite_test.go"],
embed = [":go_default_library"],
tags = ["automanaged"],
deps = ["//app/service/main/dapper/model:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = ["pointwrite.go"],
importpath = "go-common/app/service/main/dapper/pkg/pointwrite",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper/model:go_default_library",
"//library/log:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,139 @@
package pointwrite
import (
"context"
"fmt"
"strings"
"sync"
"time"
"go-common/app/service/main/dapper/model"
"go-common/library/log"
)
// WriteFn .
type WriteFn func(ctx context.Context, points []*model.SpanPoint) error
// PointWriter writer span point
type PointWriter interface {
WriteSpan(span *model.Span) error
Close() error
}
// New PointWriter
func New(fn WriteFn, precision int64, timeout time.Duration) PointWriter {
pw := &pointwriter{
precision: precision,
current: make(map[string]*model.SpanPoint),
timeout: timeout,
// TODO make it configurable
tk: time.NewTicker(time.Second * 30),
fn: fn,
}
go pw.start()
return pw
}
type pointwriter struct {
closed bool
rmx sync.RWMutex
precision int64
timeout time.Duration
current map[string]*model.SpanPoint
fn WriteFn
tk *time.Ticker
}
func (p *pointwriter) start() {
for range p.tk.C {
err := p.flush()
if err != nil {
log.Error("flush pointwriter error: %s", err)
}
}
}
func (p *pointwriter) flush() error {
p.rmx.Lock()
current := p.current
p.current = make(map[string]*model.SpanPoint)
p.rmx.Unlock()
points := make([]*model.SpanPoint, 0, len(current))
for _, point := range current {
points = append(points, point)
}
if len(points) == 0 {
return nil
}
ctx, cancel := context.WithTimeout(context.Background(), p.timeout)
defer cancel()
return p.fn(ctx, points)
}
// WriteSpan writespan
func (p *pointwriter) WriteSpan(span *model.Span) error {
if p.closed {
return fmt.Errorf("pointwriter already closed")
}
kind := "client"
if span.IsServer() {
kind = "server"
}
// NOTE: ingored sample ponit if is legacy span, DELETE it futrue
if kind == "client" && !strings.Contains(span.ServiceName, ".") {
return nil
}
peerService, ok := span.Tags["peer.service"].(string)
if !ok {
peerService = "unknown"
}
timestamp := span.StartTime.Unix() - (span.StartTime.Unix() % p.precision)
key := fmt.Sprintf("%d_%s_%s_%s_%s",
timestamp,
span.ServiceName,
span.OperationName,
peerService,
kind,
)
p.rmx.Lock()
defer p.rmx.Unlock()
point, ok := p.current[key]
if !ok {
point = &model.SpanPoint{
Timestamp: timestamp,
ServiceName: span.ServiceName,
OperationName: span.OperationName,
PeerService: peerService,
SpanKind: kind,
AvgDuration: model.SamplePoint{TraceID: span.TraceID, SpanID: span.SpanID, Value: int64(span.Duration)},
}
p.current[key] = point
}
duration := int64(span.Duration)
if duration > point.MaxDuration.Value {
point.MaxDuration.TraceID = span.TraceID
point.MaxDuration.SpanID = span.SpanID
point.MaxDuration.Value = duration
}
if point.MinDuration.Value == 0 || duration < point.MinDuration.Value {
point.MinDuration.TraceID = span.TraceID
point.MinDuration.SpanID = span.SpanID
point.MinDuration.Value = duration
}
if span.IsError() {
point.Errors = append(point.Errors, model.SamplePoint{
TraceID: span.TraceID,
SpanID: span.SpanID,
Value: duration,
})
}
return nil
}
// Close pointwriter
func (p *pointwriter) Close() error {
p.closed = true
p.tk.Stop()
return p.flush()
}

View File

@@ -0,0 +1,76 @@
package pointwrite
import (
"context"
"testing"
"time"
"go-common/app/service/main/dapper/model"
)
func TestPointWrite(t *testing.T) {
var data []*model.SpanPoint
mockFn := func(ctx context.Context, points []*model.SpanPoint) error {
data = append(data, points...)
return nil
}
pw := &pointwriter{
fn: mockFn,
current: make(map[string]*model.SpanPoint),
precision: 5,
timeout: time.Second,
tk: time.NewTicker(time.Second * time.Duration(5)),
}
spans := []*model.Span{
&model.Span{
ServiceName: "test1",
StartTime: time.Unix(100, 0),
},
&model.Span{
ServiceName: "test1",
StartTime: time.Unix(110, 0),
},
}
for _, span := range spans {
if err := pw.WriteSpan(span); err != nil {
t.Error(err)
}
}
if len(pw.current) != 2 {
t.Errorf("expect 2 point get %d", len(pw.current))
}
pw.flush()
if len(data) != 2 {
t.Errorf("expect 2 point get %d", len(data))
}
}
func TestPointWriteFlush(t *testing.T) {
var data []*model.SpanPoint
wait := make(chan bool, 1)
mockFn := func(ctx context.Context, points []*model.SpanPoint) error {
data = append(data, points...)
wait <- true
return nil
}
pw := New(mockFn, 1, time.Second)
spans := []*model.Span{
&model.Span{
ServiceName: "test1",
StartTime: time.Unix(100, 0),
},
&model.Span{
ServiceName: "test1",
StartTime: time.Unix(110, 0),
},
}
for _, span := range spans {
if err := pw.WriteSpan(span); err != nil {
t.Error(err)
}
}
<-wait
if len(data) != 2 {
t.Errorf("expect 2 point get %d", len(data))
}
}

View File

@@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["process.go"],
importpath = "go-common/app/service/main/dapper/pkg/process",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//app/service/main/dapper/model:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,20 @@
package process
import (
"context"
"go-common/app/service/main/dapper/model"
)
// Processer .
type Processer interface {
Process(ctx context.Context, protoSpan *model.ProtoSpan) error
}
// MockProcess MockProcess
type MockProcess func(ctx context.Context, protoSpan *model.ProtoSpan) error
// Process implement Processer
func (m MockProcess) Process(ctx context.Context, protoSpan *model.ProtoSpan) error {
return m(ctx, protoSpan)
}

View File

@@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["handlersignal.go"],
importpath = "go-common/app/service/main/dapper/pkg/util",
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,26 @@
package util
import (
"os"
"os/signal"
"syscall"
)
// HandlerExit handler exit signal
func HandlerExit(exitFn func(s os.Signal) int) {
sch := make(chan os.Signal, 1)
signal.Notify(sch, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
s := <-sch
os.Exit(exitFn(s))
}
// HandlerReload handler Reload signal
func HandlerReload(reload func(s os.Signal)) {
go func() {
sch := make(chan os.Signal, 1)
signal.Notify(sch, syscall.SIGHUP)
for s := range sch {
reload(s)
}
}()
}

View File

@@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["udpcollect_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["udpcollect.go"],
importpath = "go-common/app/service/main/dapper/server/udpcollect",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//library/log:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,156 @@
package udpcollect
import (
"fmt"
"net"
"net/url"
"os"
"path"
"strings"
"sync"
"time"
"go-common/library/log"
)
const (
_bufsize = 32 * 1024
)
// New UnixCollect
func New(addr string, workers int, writeFn func(p []byte) error) (*UDPCollect, error) {
if workers == 0 {
workers = 1
}
addrURL, err := url.Parse(addr)
if err != nil {
return nil, fmt.Errorf("parse addr error: %s", err)
}
return &UDPCollect{
addr: addrURL,
writeFn: writeFn,
workers: workers,
pool: sync.Pool{
New: func() interface{} {
return make([]byte, _bufsize)
},
},
readTimeout: 60 * time.Second,
}, nil
}
// UDPCollect collect span data from unix socket
type UDPCollect struct {
wg sync.WaitGroup
workers int
addr *url.URL
writeFn func(p []byte) error
readTimeout time.Duration
pool sync.Pool
closed bool
pconn net.PacketConn
}
// Start collector
func (u *UDPCollect) Start() error {
var err error
switch u.addr.Scheme {
case "unixgram":
u.pconn, err = listenUNIX(u.addr.Path)
case "udp", "udp4", "udp6":
u.pconn, err = listtenNet(u.addr.Scheme, u.addr.Host)
default:
return fmt.Errorf("unsupport network %s", u.addr.Scheme)
}
if err != nil {
return fmt.Errorf("listen packet error: %s", err)
}
log.Info("dapper agent listen at: %s, workers: %d", u.addr, u.workers)
u.wg.Add(u.workers)
for i := 0; i < u.workers; i++ {
go u.serve()
}
return nil
}
func listenUNIX(addr string) (net.PacketConn, error) {
dirname := path.Dir(addr)
info, err := os.Stat(dirname)
if err != nil {
if !os.IsNotExist(err) {
return nil, err
}
if err := os.MkdirAll(dirname, 0755); err != nil {
return nil, fmt.Errorf("create directory %s error: %s", dirname, err)
}
}
if err == nil && !info.IsDir() {
return nil, fmt.Errorf("%s is already exists and not a directory", dirname)
}
if _, err := os.Stat(addr); err == nil {
// remove old socket file
os.Remove(addr)
}
conn, err := net.ListenPacket("unixgram", addr)
if err != nil {
return nil, err
}
// make file permission to 666, so php can wirte span to this socket
return conn, os.Chmod(addr, 0666)
}
func listtenNet(network, addr string) (net.PacketConn, error) {
return net.ListenPacket(network, addr)
}
func (u *UDPCollect) serve() {
defer u.wg.Done()
for {
if err := u.handler(u.pconn); err != nil {
if strings.Contains(err.Error(), "closed") && u.closed {
return
}
log.Error("handler PacketConn error: %s, retry after second", err)
time.Sleep(time.Second)
}
}
}
func (u *UDPCollect) handler(pconn net.PacketConn) error {
p := u.buffer()
defer u.freeBuffer(p)
pconn.SetReadDeadline(time.Now().Add(u.readTimeout))
n, _, err := pconn.ReadFrom(p)
if n > 0 {
u.writeFn(p[:n])
}
if err == nil {
return nil
}
if netErr, ok := err.(net.Error); ok {
// ignore timeout and temporyary
if netErr.Timeout() || netErr.Temporary() {
return nil
}
}
return err
}
func (u *UDPCollect) buffer() []byte {
return u.pool.Get().([]byte)
}
func (u *UDPCollect) freeBuffer(p []byte) {
u.pool.Put(p)
}
// Close udp collect
func (u *UDPCollect) Close() error {
u.closed = true
u.pconn.Close()
// wait all workers exit
u.wg.Wait()
if u.addr.Scheme == "unixgram" {
return os.Remove(u.addr.Path)
}
return nil
}

View File

@@ -0,0 +1,38 @@
package udpcollect
import (
"bytes"
"net"
"testing"
"time"
)
func TestUDPCollect(t *testing.T) {
count := 0
data := []byte("hello world")
collect, err := New("unixgram:///tmp/test.sock", 2, func(p []byte) error {
count++
if !bytes.Equal(p, data) {
t.Errorf("invalid p: %s", p)
}
return nil
})
if err != nil {
t.Fatal(err)
}
if err := collect.Start(); err != nil {
t.Fatal(err)
}
conn, err := net.DialTimeout("unixgram", "/tmp/test.sock", time.Second)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 20; i++ {
conn.Write(data)
}
time.Sleep(time.Second)
collect.Close()
if count != 20 {
t.Errorf("wrong get %d != 20", count)
}
}