Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
package(default_visibility = ["//visibility:public"])
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/dapper-query/api/v1:all-srcs",
"//app/service/main/dapper-query/cmd:all-srcs",
"//app/service/main/dapper-query/conf:all-srcs",
"//app/service/main/dapper-query/dao:all-srcs",
"//app/service/main/dapper-query/model:all-srcs",
"//app/service/main/dapper-query/pkg/cltclient:all-srcs",
"//app/service/main/dapper-query/pkg/opslog:all-srcs",
"//app/service/main/dapper-query/service:all-srcs",
"//app/service/main/dapper-query/util:all-srcs",
],
tags = ["automanaged"],
)

View File

@@ -0,0 +1,22 @@
# dapper-query
#### v3.1.3
> 优化 operation_name 耗时列表 API
#### v3.1.2
> 列表页暴露所有 tag
#### v3.1.1
> 修复备注bug
> 修复 span 数为 1 span.kind 不是 server panic
> 支持 traceID:spanID 形式查询
#### v3.1.0
> 优化日志搜索
> 添加查询单个服务API
> 添加查询原始 span API
#### v3.0.0
> 1. 适配 dapper 重构后新的存储格式
#### v1.0.0
> 1. 从 dapper-service 分离出 dapper-query

View File

@@ -0,0 +1,13 @@
# Owner
maojian
haoguanwei
weicheng
# Author
haoguanwei
zhoujixiang
weicheng
# Reviewer
maojian
haoguanwei

View File

@@ -0,0 +1,21 @@
PROTO_FILE=api/v1/api.proto
VENDOR_DIR=$(shell echo $$GOPATH | cut -d':' -f1)/src/go-common/vendor
GO_FILES=$(shell find . -name "*.go" -type f)
rundev: build
./cmd/cmd -conf cmd/dapper-query-example.toml -http tcp://127.0.0.1:8000\?timeout=10s -http.perf tcp://127.0.0.1:6006 -log.v 10 -log.stdout
genapi: $(PROTO_FILE)
protoc -I. -I$(VENDOR_DIR) --gogo_out=plugins=grpc:. $<
protoc -I. -I$(VENDOR_DIR) --bm_out=logtostderr=1,jsonpb=true:. $<
protoc -I. -I$(VENDOR_DIR) --swagger_out=logtostderr=1:. $<
swagger-markdown -i api/v1/api.swagger.json -o api/v1/README.md
build: $(GO_FILES) genapi
go build -o cmd/cmd cmd/main.go
clean:
rm -rf bin/*
.PHONY: genapi build clean rundev

View File

@@ -0,0 +1,18 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- haoguanwei
- maojian
- weicheng
- zhoujixiang
labels:
- main
- service
- service/main/dapper-query
options:
no_parent_owners: true
reviewers:
- haoguanwei
- maojian
- weicheng
- zhoujixiang

View File

@@ -0,0 +1,4 @@
# dapper-query
### v1.0.0
>>1.从 dapper-service 分离出 dapper-query 提供 dapper 查询API

View File

@@ -0,0 +1,47 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_library(
name = "go_default_library",
srcs = [
"api.pb.bm.go",
"api.pb.go",
"marshal.go",
],
importpath = "go-common/app/service/main/dapper-query/api/v1",
tags = ["manual"],
visibility = ["//visibility:public"],
deps = [
"//app/tool/protoc-gen-bm/jsonpb:go_default_library",
"//library/net/http/blademaster:go_default_library",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
"@com_github_golang_protobuf//jsonpb:go_default_library",
"@go_googleapis//google/api:annotations_go_proto",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_x_net//context:go_default_library",
],
)

View File

@@ -0,0 +1,446 @@
api/v1/api.proto
================
**Version:** version not set
### /x/internal/dapper/clt-status
---
##### ***GET***
**Summary:** CltStatus 获取 collector 信息
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1CltStatusReply](#v1cltstatusreply) |
### /x/internal/dapper/depends-rank
---
##### ***GET***
**Summary:** DependsRank 查询某一个 service_name:operation_name 下所有依赖组件排名
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
| start | query | | No | string (int64) |
| end | query | | No | string (int64) |
| rank_type | query | 排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数. | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1DependsRankReply](#v1dependsrankreply) |
### /x/internal/dapper/depends-topology
---
##### ***GET***
**Summary:** DependsTopology 获取依赖拓扑图
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1DependsTopologyReply](#v1dependstopologyreply) |
### /x/internal/dapper/list-span
---
##### ***GET***
**Summary:** ListSpan 列出一个 service_name 某一 operation_name 所有采样到 Span
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
| operation_name | query | | No | string |
| start | query | | No | string (int64) |
| end | query | | No | string (int64) |
| order | query | 目前支持的 order time:desc time:asc 按时间排序 duration:desc duration:asc 按耗时排序. | No | string |
| only_error | query | 只显示 error 的 span. | No | boolean (boolean) |
| offset | query | | No | integer |
| limit | query | | No | integer |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1ListSpanReply](#v1listspanreply) |
### /x/internal/dapper/operation-names
---
##### ***GET***
**Summary:** ListOperationName 列出某一 service 下所有 operation_name 仅 span.kind 为 server 的 operation_name
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1ListOperationNameReply](#v1listoperationnamereply) |
### /x/internal/dapper/operation-names-rank
---
##### ***GET***
**Summary:** OperationNameRank 查询 OperationName 排名列表
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
| start | query | | No | string (int64) |
| end | query | | No | string (int64) |
| rank_type | query | 排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数. | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1OperationNameRankReply](#v1operationnamerankreply) |
### /x/internal/dapper/ops-log
---
##### ***GET***
**Summary:** OpsLog 获取 通过 trace-id 获取 opslog 记录
如果请求的 trace-id 没有被记录到, 则需要提供 service_name operation_name 和 timestamp 进行模糊查询
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| trace_id | query | | No | string |
| span_id | query | | No | string |
| trace_field | query | | No | string |
| service_name | query | | No | string |
| operation_name | query | | No | string |
| start | query | 开始时间. | No | string (int64) |
| end | query | 结束时间. | No | string (int64) |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1OpsLogReply](#v1opslogreply) |
### /x/internal/dapper/raw-trace
---
##### ***GET***
**Summary:** RawTrace 原始 Trace 数据
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| trace_id | query | | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1RawTraceReply](#v1rawtracereply) |
### /x/internal/dapper/sample-point
---
##### ***GET***
**Summary:** SamplePoint 获取采样点数据
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
| operation_name | query | | No | string |
| only_error | query | only_error 在 errors 那个图可以指定为 true. | No | boolean (boolean) |
| interval | query | interval 使用 span-series 返回的 interval 即可. | No | string (int64) |
| time | query | time 使用 time-series 返回的时间即可,相同格式型如 2006-01-02T15:04:05. | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1SamplePointReply](#v1samplepointreply) |
### /x/internal/dapper/service-depend
---
##### ***GET***
**Summary:** ServiceDepend 查询服务的直接依赖
TODO: 通过最近收集的到3 个 span 实时计算的,在当前查询的服务出现不正常的时候,查询结果可能不准确
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | service_name 不解释!. | No | string |
| operation_name | query | operation_name 当 operation_name 为空时查询所有 operation_name 然后 merge 结果. | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1ServiceDependReply](#v1servicedependreply) |
### /x/internal/dapper/service-names
---
##### ***GET***
**Summary:** ListServiceName 列出所有 service
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1ListServiceNameReply](#v1listservicenamereply) |
### /x/internal/dapper/span-series
---
##### ***GET***
**Summary:** SpanSeries 获取 span 的时间序列数据
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| service_name | query | | No | string |
| operation_name | query | | No | string |
| start | query | | No | string (int64) |
| end | query | | No | string (int64) |
| fields | query | 可选的 fields 有 max_duration, min_duration, avg_duration, errors 其中除 errors 返回的是一段时间内的总数 其他返回的都是平均数 fields 是个数组可以通过 fields=max_duration,min_duration,avg_duration 逗号分隔. | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1SpanSeriesReply](#v1spanseriesreply) |
### /x/internal/dapper/trace
---
##### ***GET***
**Summary:** Trace 查询一个 Trace
**Parameters**
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ---- |
| trace_id | query | | No | string |
| span_id | query | | No | string |
**Responses**
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | A successful response. | [v1TraceReply](#v1tracereply) |
### Models
---
### v1Client
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| addr | string | | No |
| err_count | string (int64) | | No |
| rate | string (int64) | | No |
| up_time | string (int64) | | No |
### v1CltNode
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| node | string | | No |
| queue_len | string (int64) | | No |
| clients | [ [v1Client](#v1client) ] | | No |
### v1CltStatusReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| nodes | [ [v1CltNode](#v1cltnode) ] | | No |
### v1DependsRankReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| rank_type | string | | No |
| items | [ [v1RankItem](#v1rankitem) ] | | No |
### v1DependsTopologyItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_name | string | | No |
| depend_on | string | | No |
### v1DependsTopologyReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| items | [ [v1DependsTopologyItem](#v1dependstopologyitem) ] | | No |
### v1Field
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| key | string | | No |
| value | string | | No |
### v1ListOperationNameReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| operation_names | [ string ] | | No |
### v1ListServiceNameReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_names | [ string ] | | No |
### v1ListSpanReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| items | [ [v1SpanListItem](#v1spanlistitem) ] | | No |
### v1Log
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| timestamp | string (int64) | | No |
| fields | [ [v1Field](#v1field) ] | | No |
### v1OperationNameRankReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| rank_type | string | | No |
| items | [ [v1RankItem](#v1rankitem) ] | | No |
### v1OpsLogRecord
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| time | string | | No |
| fields | object | | No |
| level | string | | No |
| message | string | | No |
### v1OpsLogReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| records | [ [v1OpsLogRecord](#v1opslogrecord) ] | | No |
### v1RankItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_name | string | | No |
| operation_name | string | | No |
| value | double | | No |
### v1RawTraceReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| items | [ [v1Span](#v1span) ] | | No |
### v1SamplePointItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| trace_id | string | | No |
| span_id | string | | No |
| duration | string (int64) | | No |
| is_error | boolean (boolean) | | No |
### v1SamplePointReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| items | [ [v1SamplePointItem](#v1samplepointitem) ] | | No |
### v1SeriesItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| field | string | | No |
| values | [ string (int64) ] | | No |
### v1ServiceDependItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_name | string | | No |
| component | string | | No |
| operation_names | [ string ] | | No |
### v1ServiceDependReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| items | [ [v1ServiceDependItem](#v1servicedependitem) ] | | No |
### v1Span
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_name | string | | No |
| operation_name | string | | No |
| trace_id | string | | No |
| span_id | string | | No |
| parent_id | string | | No |
| start_time | string (int64) | | No |
| duration | string (int64) | | No |
| tags | object | | No |
| logs | [ [v1Log](#v1log) ] | | No |
| level | integer | | No |
| childs | [ [v1Span](#v1span) ] | | No |
### v1SpanListItem
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| trace_id | string | | No |
| span_id | string | | No |
| parent_id | string | | No |
| service_name | string | | No |
| operation_name | string | | No |
| start_time | string | | No |
| duration | string | | No |
| tags | object | | No |
| is_error | boolean (boolean) | | No |
| container_ip | string | | No |
| region_zone | string | | No |
| mark | string | | No |
### v1SpanSeriesReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| interval | string (int64) | | No |
| times | [ string ] | | No |
| items | [ [v1SeriesItem](#v1seriesitem) ] | | No |
### v1TagValue
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| string_value | string | | No |
| int64_value | string (int64) | | No |
| bool_value | boolean (boolean) | | No |
| float_value | float | | No |
### v1TraceReply
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| service_count | integer | | No |
| span_count | integer | | No |
| max_level | integer | | No |
| root | [v1Span](#v1span) | | No |

View File

@@ -0,0 +1,260 @@
// Package v1 Code generated by go-common/app/tool/protoc-gen-bm. DO NOT EDIT.
package v1
import (
"bytes"
"context"
"encoding/json"
"go-common/app/tool/protoc-gen-bm/jsonpb"
bm "go-common/library/net/http/blademaster"
)
// BMDapperQueryServer interface as same as gGRPC server define
type BMDapperQueryServer interface {
ListServiceName(context.Context, *ListServiceNameReq) (*ListServiceNameReply, error)
ListOperationName(context.Context, *ListOperationNameReq) (*ListOperationNameReply, error)
ListSpan(context.Context, *ListSpanReq) (*ListSpanReply, error)
Trace(context.Context, *TraceReq) (*TraceReply, error)
RawTrace(context.Context, *RawTraceReq) (*RawTraceReply, error)
OperationNameRank(context.Context, *OperationNameRankReq) (*OperationNameRankReply, error)
DependsRank(context.Context, *DependsRankReq) (*DependsRankReply, error)
SpanSeries(context.Context, *SpanSeriesReq) (*SpanSeriesReply, error)
SamplePoint(context.Context, *SamplePointReq) (*SamplePointReply, error)
CltStatus(context.Context, *CltStatusReq) (*CltStatusReply, error)
DependsTopology(context.Context, *DependsTopologyReq) (*DependsTopologyReply, error)
OpsLog(context.Context, *OpsLogReq) (*OpsLogReply, error)
ServiceDepend(context.Context, *ServiceDependReq) (*ServiceDependReply, error)
}
// _BMServerDapperQueryserver
type _BMServerDapperQuery struct {
BMDapperQueryServer
}
func (b *_BMServerDapperQuery) bmDapperQueryListServiceNameHandler(c *bm.Context) {
req := new(ListServiceNameReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.ListServiceName(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryListOperationNameHandler(c *bm.Context) {
req := new(ListOperationNameReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.ListOperationName(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryListSpanHandler(c *bm.Context) {
req := new(ListSpanReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.ListSpan(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryTraceHandler(c *bm.Context) {
req := new(TraceReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.Trace(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryRawTraceHandler(c *bm.Context) {
req := new(RawTraceReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.RawTrace(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryOperationNameRankHandler(c *bm.Context) {
req := new(OperationNameRankReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.OperationNameRank(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryDependsRankHandler(c *bm.Context) {
req := new(DependsRankReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.DependsRank(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQuerySpanSeriesHandler(c *bm.Context) {
req := new(SpanSeriesReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.SpanSeries(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQuerySamplePointHandler(c *bm.Context) {
req := new(SamplePointReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.SamplePoint(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryCltStatusHandler(c *bm.Context) {
req := new(CltStatusReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.CltStatus(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryDependsTopologyHandler(c *bm.Context) {
req := new(DependsTopologyReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.DependsTopology(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryOpsLogHandler(c *bm.Context) {
req := new(OpsLogReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.OpsLog(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
func (b *_BMServerDapperQuery) bmDapperQueryServiceDependHandler(c *bm.Context) {
req := new(ServiceDependReq)
if err := c.Bind(req); err != nil {
return
}
reply, err := b.ServiceDepend(c.Context, req)
if err != nil {
c.JSON(nil, err)
return
}
body := &bytes.Buffer{}
marshaler := jsonpb.Marshaler{EmitDefaults: true, OrigName: true}
err = marshaler.Marshal(body, reply)
c.JSON(json.RawMessage(body.Bytes()), err)
}
// RegisterDapperQueryBMServer register bm server
func RegisterDapperQueryBMServer(e *bm.Engine, s BMDapperQueryServer) {
bs := &_BMServerDapperQuery{s}
e.GET("/x/internal/dapper/service-names", bs.bmDapperQueryListServiceNameHandler)
e.GET("/x/internal/dapper/operation-names", bs.bmDapperQueryListOperationNameHandler)
e.GET("/x/internal/dapper/list-span", bs.bmDapperQueryListSpanHandler)
e.GET("/x/internal/dapper/trace", bs.bmDapperQueryTraceHandler)
e.GET("/x/internal/dapper/raw-trace", bs.bmDapperQueryRawTraceHandler)
e.GET("/x/internal/dapper/operation-names-rank", bs.bmDapperQueryOperationNameRankHandler)
e.GET("/x/internal/dapper/depends-rank", bs.bmDapperQueryDependsRankHandler)
e.GET("/x/internal/dapper/span-series", bs.bmDapperQuerySpanSeriesHandler)
e.GET("/x/internal/dapper/sample-point", bs.bmDapperQuerySamplePointHandler)
e.GET("/x/internal/dapper/clt-status", bs.bmDapperQueryCltStatusHandler)
e.GET("/x/internal/dapper/depends-topology", bs.bmDapperQueryDependsTopologyHandler)
e.GET("/x/internal/dapper/ops-log", bs.bmDapperQueryOpsLogHandler)
e.GET("/x/internal/dapper/service-depend", bs.bmDapperQueryServiceDependHandler)
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,348 @@
syntax = "proto3";
import "google/api/annotations.proto";
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
package dapper.query.v1;
option go_package = "v1";
message ListServiceNameReq {}
message ListServiceNameReply {
repeated string service_names = 1;
}
message ListOperationNameReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
}
message ListOperationNameReply {
repeated string operation_names = 1;
}
message ListSpanReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
string operation_name = 2 [(gogoproto.moretags) = "form:\"operation_name\" validate:\"required\""];
int64 start = 3 [(gogoproto.moretags) = "form:\"start\""];
int64 end = 4 [(gogoproto.moretags) = "form:\"end\""];
// 目前支持的 order
// time:desc time:asc 按时间排序
// duration:desc duration:asc 按耗时排序
string order = 5 [(gogoproto.moretags) = "form:\"order\""];
// 只显示 error 的 span
bool only_error = 6 [(gogoproto.moretags) = "form:\"only_error\""];
int32 offset = 7 [(gogoproto.moretags) = "form:\"offset\""];
int32 limit = 8 [(gogoproto.moretags) = "form:\"limit\""];
}
message SpanListItem {
string trace_id = 1;
string span_id = 2;
string parent_id = 3;
string service_name = 4;
string operation_name = 5;
string start_time = 6; // span 开始时间
string duration = 7; // span 耗时
map<string, TagValue> tags = 12;
// Deprecated: use tags
bool is_error = 8; // 是否发生错误
string container_ip = 9;
string region_zone = 10;
string mark = 11;
}
message TraceReq {
string trace_id = 1 [(gogoproto.moretags) = "form:\"trace_id\" validate:\"required\""];
string span_id = 2 [(gogoproto.moretags) = "form:\"span_id\""];
}
message RawTraceReq {
string trace_id = 1 [(gogoproto.moretags) = "form:\"trace_id\" validate:\"required\""];
};
message RawTraceReply {
repeated Span items = 1;
}
message TagValue {
oneof value {
string string_value = 2;
int64 int64_value = 3;
bool bool_value = 4;
float float_value = 5;
}
}
message Field {
string key = 1;
string value = 2;
}
message Log {
int64 timestamp = 1;
repeated Field fields = 2;
}
message Span {
string service_name = 1;
string operation_name = 2;
string trace_id = 3;
string span_id = 4;
string parent_id = 5;
int64 start_time = 6;
int64 duration = 7;
map<string, TagValue> tags = 8;
repeated Log logs = 9;
int32 level = 10;
repeated Span childs = 11;
}
message TraceReply {
int32 service_count = 1;
int32 span_count = 2;
int32 max_level = 3;
Span root = 4;
}
message ListSpanReply {
repeated SpanListItem items = 2;
}
message OperationNameRankReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
int64 start = 2 [(gogoproto.moretags) = "form:\"start\""];
int64 end = 3 [(gogoproto.moretags) = "form:\"end\""];
// 排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数
string rank_type = 4 [(gogoproto.moretags) = "form:\"rank_type\""];
}
message RankItem {
string service_name = 1;
string operation_name = 2;
// 当 rank type 是 max_duration, min_duration, avg_duration 时 value 是纳秒
// 当 rank type 是 errors 是 value 是错误数
double value = 3;
}
message OperationNameRankReply {
string rank_type = 1;
repeated RankItem items = 2;
}
message DependsRankReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
int64 start = 3 [(gogoproto.moretags) = "form:\"start\""];
int64 end = 4 [(gogoproto.moretags) = "form:\"end\""];
// 排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数
string rank_type = 5 [(gogoproto.moretags) = "form:\"rank_type\""];
}
message DependsRankReply {
string rank_type = 1;
repeated RankItem items = 2;
}
message SpanSeriesReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
string operation_name = 2 [(gogoproto.moretags) = "form:\"operation_name\" validate:\"required\""];
int64 start = 3 [(gogoproto.moretags) = "form:\"start\""];
int64 end = 4 [(gogoproto.moretags) = "form:\"end\""];
// 可选的 fields 有 max_duration, min_duration, avg_duration, errors
// 其中除 errors 返回的是一段时间内的总数 其他返回的都是平均数
// fields 是个数组可以通过 fields=max_duration,min_duration,avg_duration 逗号分隔
string fields = 6 [(gogoproto.moretags) = "form:\"fields\""];
}
message SeriesItem {
// 名称一般是请求的 field+{count|mean}
string field = 1;
repeated int64 values = 2 [(gogoproto.customtype) = "*int64"];
}
message SpanSeriesReply {
// 返回点的间隔
int64 interval = 5;
repeated string times = 1;
repeated SeriesItem items = 2;
}
message CltStatusReq {}
message Client {
string addr = 1;
int64 err_count = 2;
int64 rate = 3;
int64 up_time = 4;
}
message CltNode {
string node = 1;
int64 queue_len = 2;
repeated Client clients = 3;
}
message CltStatusReply {
repeated CltNode nodes = 1;
}
message SamplePointReq {
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
string operation_name = 2 [(gogoproto.moretags) = "form:\"operation_name\" validate:\"required\""];
// only_error 在 errors 那个图可以指定为 true
bool only_error = 3 [(gogoproto.moretags) = "form:\"only_error\""];
// interval 使用 span-series 返回的 interval 即可
int64 interval = 5 [(gogoproto.moretags) = "form:\"interval\" validate:\"required\""];
// time 使用 time-series 返回的时间即可,相同格式型如 2006-01-02T15:04:05
string time = 6 [(gogoproto.moretags) = "form:\"time\" validate:\"required\""];
}
message SamplePointItem {
string trace_id = 1;
string span_id = 2;
int64 duration = 3;
bool is_error = 4;
}
message SamplePointReply {
repeated SamplePointItem items = 1;
}
message DependsTopologyReq {}
message DependsTopologyItem {
string service_name = 1;
string depend_on = 2;
}
message DependsTopologyReply {
repeated DependsTopologyItem items = 1;
}
message OpsLogReq {
string trace_id = 1 [(gogoproto.moretags) = "form:\"trace_id\" validate:\"required\""];
string span_id = 2 [(gogoproto.moretags) = "form:\"span_id\""];
string trace_field = 3 [(gogoproto.moretags) = "form:\"trace_field\""];
string service_name = 4 [(gogoproto.moretags) = "form:\"service_name\""];
string operation_name = 5 [(gogoproto.moretags) = "form:\"operation_name\""];
// 开始时间
int64 start = 6 [(gogoproto.moretags) = "form:\"start\""];
// 结束时间
int64 end = 7[(gogoproto.moretags) = "form:\"end\""];
}
message OpsLogRecord {
string time = 1;
map<string, TagValue> fields = 2;
string level = 3;
string message = 4;
}
message OpsLogReply {
repeated OpsLogRecord records = 1;
}
message ServiceDependReq {
// service_name 不解释!
string service_name = 1 [(gogoproto.moretags) = "form:\"service_name\" validate:\"required\""];
// operation_name 当 operation_name 为空时查询所有 operation_name 然后 merge 结果
string operation_name = 2 [(gogoproto.moretags) = "form:\"operation_name\""];
}
message ServiceDependReply {
repeated ServiceDependItem items = 1;
}
message ServiceDependItem {
// service_name 依赖服务名称, service 为 AppID 其他为组件名 mysql, redis, http 等
string service_name = 1;
// component, 通讯组件 e.g. net/http, goRPC, gRPC
string component = 2;
// operation_names 被依赖服务的 operation_names, mysql, redis 等为空
repeated string operation_names = 3;
}
// DapperQuery dapper 查询服务
service DapperQuery {
// ListServiceName 列出所有 service
rpc ListServiceName(ListServiceNameReq) returns (ListServiceNameReply) {
option (google.api.http) = {
get: "/x/internal/dapper/service-names";
};
}
// ListOperationName 列出某一 service 下所有 operation_name 仅 span.kind 为 server 的 operation_name
rpc ListOperationName(ListOperationNameReq) returns (ListOperationNameReply) {
option (google.api.http) = {
get: "/x/internal/dapper/operation-names";
};
}
// ListSpan 列出一个 service_name 某一 operation_name 所有采样到 Span
rpc ListSpan(ListSpanReq) returns (ListSpanReply) {
option (google.api.http) = {
get: "/x/internal/dapper/list-span";
};
}
// Trace 查询一个 Trace
rpc Trace(TraceReq) returns (TraceReply) {
option (google.api.http) = {
get: "/x/internal/dapper/trace";
};
}
// RawTrace 原始 Trace 数据
rpc RawTrace(RawTraceReq) returns (RawTraceReply) {
option (google.api.http) = {
get: "/x/internal/dapper/raw-trace";
};
}
// OperationNameRank 查询 OperationName 排名列表
rpc OperationNameRank(OperationNameRankReq) returns(OperationNameRankReply) {
option (google.api.http) = {
get: "/x/internal/dapper/operation-names-rank";
};
}
// DependsRank 查询某一个 service_name:operation_name 下所有依赖组件排名
rpc DependsRank(DependsRankReq) returns(DependsRankReply) {
option (google.api.http) = {
get: "/x/internal/dapper/depends-rank";
};
}
// SpanSeries 获取 span 的时间序列数据
rpc SpanSeries(SpanSeriesReq) returns (SpanSeriesReply) {
option (google.api.http) = {
get: "/x/internal/dapper/span-series";
};
}
// SamplePoint 获取采样点数据
rpc SamplePoint(SamplePointReq) returns(SamplePointReply) {
option (google.api.http) = {
get: "/x/internal/dapper/sample-point";
};
}
// CltStatus 获取 collector 信息
rpc CltStatus(CltStatusReq) returns(CltStatusReply) {
option (google.api.http) = {
get: "/x/internal/dapper/clt-status";
};
}
// DependsTopology 获取依赖拓扑图
rpc DependsTopology(DependsTopologyReq) returns (DependsTopologyReply) {
option (google.api.http) = {
get: "/x/internal/dapper/depends-topology";
};
}
// OpsLog 获取 通过 trace-id 获取 opslog 记录
// 如果请求的 trace-id 没有被记录到, 则需要提供 service_name operation_name 和 timestamp 进行模糊查询
rpc OpsLog(OpsLogReq) returns (OpsLogReply) {
option (google.api.http) = {
get: "/x/internal/dapper/ops-log";
};
}
// ServiceDepend 查询服务的直接依赖
// TODO: 通过最近收集的到3 个 span 实时计算的,在当前查询的服务出现不正常的时候,查询结果可能不准确
rpc ServiceDepend(ServiceDependReq) returns (ServiceDependReply) {
option (google.api.http) = {
get: "/x/internal/dapper/service-depend";
};
}
}

View File

@@ -0,0 +1,974 @@
{
"swagger": "2.0",
"info": {
"title": "api/v1/api.proto",
"version": "version not set"
},
"schemes": [
"http",
"https"
],
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"paths": {
"/x/internal/dapper/clt-status": {
"get": {
"summary": "CltStatus 获取 collector 信息",
"operationId": "CltStatus",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1CltStatusReply"
}
}
},
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/depends-rank": {
"get": {
"summary": "DependsRank 查询某一个 service_name:operation_name 下所有依赖组件排名",
"operationId": "DependsRank",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1DependsRankReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "start",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "end",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "rank_type",
"description": "排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数.",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/depends-topology": {
"get": {
"summary": "DependsTopology 获取依赖拓扑图",
"operationId": "DependsTopology",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1DependsTopologyReply"
}
}
},
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/list-span": {
"get": {
"summary": "ListSpan 列出一个 service_name 某一 operation_name 所有采样到 Span",
"operationId": "ListSpan",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1ListSpanReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "operation_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "start",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "end",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "order",
"description": "目前支持的 order \ntime:desc time:asc 按时间排序\nduration:desc duration:asc 按耗时排序.",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "only_error",
"description": "只显示 error 的 span.",
"in": "query",
"required": false,
"type": "boolean",
"format": "boolean"
},
{
"name": "offset",
"in": "query",
"required": false,
"type": "integer",
"format": "int32"
},
{
"name": "limit",
"in": "query",
"required": false,
"type": "integer",
"format": "int32"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/operation-names": {
"get": {
"summary": "ListOperationName 列出某一 service 下所有 operation_name 仅 span.kind 为 server 的 operation_name",
"operationId": "ListOperationName",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1ListOperationNameReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/operation-names-rank": {
"get": {
"summary": "OperationNameRank 查询 OperationName 排名列表",
"operationId": "OperationNameRank",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1OperationNameRankReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "start",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "end",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "rank_type",
"description": "排序类型 max_duration 最大耗时, min_duration 最小耗时, avg_duration 平均耗时, errors 错误数.",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/ops-log": {
"get": {
"summary": "OpsLog 获取 通过 trace-id 获取 opslog 记录\n如果请求的 trace-id 没有被记录到, 则需要提供 service_name operation_name 和 timestamp 进行模糊查询",
"operationId": "OpsLog",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1OpsLogReply"
}
}
},
"parameters": [
{
"name": "trace_id",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "span_id",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "trace_field",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "operation_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "start",
"description": "开始时间.",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "end",
"description": "结束时间.",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/raw-trace": {
"get": {
"summary": "RawTrace 原始 Trace 数据",
"operationId": "RawTrace",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1RawTraceReply"
}
}
},
"parameters": [
{
"name": "trace_id",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/sample-point": {
"get": {
"summary": "SamplePoint 获取采样点数据",
"operationId": "SamplePoint",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1SamplePointReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "operation_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "only_error",
"description": "only_error 在 errors 那个图可以指定为 true.",
"in": "query",
"required": false,
"type": "boolean",
"format": "boolean"
},
{
"name": "interval",
"description": "interval 使用 span-series 返回的 interval 即可.",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "time",
"description": "time 使用 time-series 返回的时间即可,相同格式型如 2006-01-02T15:04:05.",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/service-depend": {
"get": {
"summary": "ServiceDepend 查询服务的直接依赖\nTODO: 通过最近收集的到3 个 span 实时计算的,在当前查询的服务出现不正常的时候,查询结果可能不准确",
"operationId": "ServiceDepend",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1ServiceDependReply"
}
}
},
"parameters": [
{
"name": "service_name",
"description": "service_name 不解释!.",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "operation_name",
"description": "operation_name 当 operation_name 为空时查询所有 operation_name 然后 merge 结果.",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/service-names": {
"get": {
"summary": "ListServiceName 列出所有 service",
"operationId": "ListServiceName",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1ListServiceNameReply"
}
}
},
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/span-series": {
"get": {
"summary": "SpanSeries 获取 span 的时间序列数据",
"operationId": "SpanSeries",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1SpanSeriesReply"
}
}
},
"parameters": [
{
"name": "service_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "operation_name",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "start",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "end",
"in": "query",
"required": false,
"type": "string",
"format": "int64"
},
{
"name": "fields",
"description": "可选的 fields 有 max_duration, min_duration, avg_duration, errors\n其中除 errors 返回的是一段时间内的总数 其他返回的都是平均数\nfields 是个数组可以通过 fields=max_duration,min_duration,avg_duration 逗号分隔.",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
},
"/x/internal/dapper/trace": {
"get": {
"summary": "Trace 查询一个 Trace",
"operationId": "Trace",
"responses": {
"200": {
"description": "A successful response.",
"schema": {
"$ref": "#/definitions/v1TraceReply"
}
}
},
"parameters": [
{
"name": "trace_id",
"in": "query",
"required": false,
"type": "string"
},
{
"name": "span_id",
"in": "query",
"required": false,
"type": "string"
}
],
"tags": [
"DapperQuery"
]
}
}
},
"definitions": {
"v1Client": {
"type": "object",
"properties": {
"addr": {
"type": "string"
},
"err_count": {
"type": "string",
"format": "int64"
},
"rate": {
"type": "string",
"format": "int64"
},
"up_time": {
"type": "string",
"format": "int64"
}
}
},
"v1CltNode": {
"type": "object",
"properties": {
"node": {
"type": "string"
},
"queue_len": {
"type": "string",
"format": "int64"
},
"clients": {
"type": "array",
"items": {
"$ref": "#/definitions/v1Client"
}
}
}
},
"v1CltStatusReply": {
"type": "object",
"properties": {
"nodes": {
"type": "array",
"items": {
"$ref": "#/definitions/v1CltNode"
}
}
}
},
"v1DependsRankReply": {
"type": "object",
"properties": {
"rank_type": {
"type": "string"
},
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1RankItem"
}
}
}
},
"v1DependsTopologyItem": {
"type": "object",
"properties": {
"service_name": {
"type": "string"
},
"depend_on": {
"type": "string"
}
}
},
"v1DependsTopologyReply": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1DependsTopologyItem"
}
}
}
},
"v1Field": {
"type": "object",
"properties": {
"key": {
"type": "string"
},
"value": {
"type": "string"
}
}
},
"v1ListOperationNameReply": {
"type": "object",
"properties": {
"operation_names": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"v1ListServiceNameReply": {
"type": "object",
"properties": {
"service_names": {
"type": "array",
"items": {
"type": "string"
}
}
}
},
"v1ListSpanReply": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1SpanListItem"
}
}
}
},
"v1Log": {
"type": "object",
"properties": {
"timestamp": {
"type": "string",
"format": "int64"
},
"fields": {
"type": "array",
"items": {
"$ref": "#/definitions/v1Field"
}
}
}
},
"v1OperationNameRankReply": {
"type": "object",
"properties": {
"rank_type": {
"type": "string"
},
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1RankItem"
}
}
}
},
"v1OpsLogRecord": {
"type": "object",
"properties": {
"time": {
"type": "string"
},
"fields": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/v1TagValue"
}
},
"level": {
"type": "string"
},
"message": {
"type": "string"
}
}
},
"v1OpsLogReply": {
"type": "object",
"properties": {
"records": {
"type": "array",
"items": {
"$ref": "#/definitions/v1OpsLogRecord"
}
}
}
},
"v1RankItem": {
"type": "object",
"properties": {
"service_name": {
"type": "string"
},
"operation_name": {
"type": "string"
},
"value": {
"type": "number",
"format": "double",
"title": "当 rank type 是 max_duration, min_duration, avg_duration 时 value 是纳秒\n当 rank type 是 errors 是 value 是错误数"
}
}
},
"v1RawTraceReply": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1Span"
}
}
}
},
"v1SamplePointItem": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"duration": {
"type": "string",
"format": "int64"
},
"is_error": {
"type": "boolean",
"format": "boolean"
}
}
},
"v1SamplePointReply": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1SamplePointItem"
}
}
}
},
"v1SeriesItem": {
"type": "object",
"properties": {
"field": {
"type": "string",
"title": "名称一般是请求的 field+{count|mean}"
},
"values": {
"type": "array",
"items": {
"type": "string",
"format": "int64"
}
}
}
},
"v1ServiceDependItem": {
"type": "object",
"properties": {
"service_name": {
"type": "string",
"title": "service_name 依赖服务名称, service 为 AppID 其他为组件名 mysql, redis, http 等"
},
"component": {
"type": "string",
"title": "component, 通讯组件 e.g. net/http, goRPC, gRPC"
},
"operation_names": {
"type": "array",
"items": {
"type": "string"
},
"title": "operation_names 被依赖服务的 operation_names, mysql, redis 等为空"
}
}
},
"v1ServiceDependReply": {
"type": "object",
"properties": {
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1ServiceDependItem"
}
}
}
},
"v1Span": {
"type": "object",
"properties": {
"service_name": {
"type": "string"
},
"operation_name": {
"type": "string"
},
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"parent_id": {
"type": "string"
},
"start_time": {
"type": "string",
"format": "int64"
},
"duration": {
"type": "string",
"format": "int64"
},
"tags": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/v1TagValue"
}
},
"logs": {
"type": "array",
"items": {
"$ref": "#/definitions/v1Log"
}
},
"level": {
"type": "integer",
"format": "int32"
},
"childs": {
"type": "array",
"items": {
"$ref": "#/definitions/v1Span"
}
}
}
},
"v1SpanListItem": {
"type": "object",
"properties": {
"trace_id": {
"type": "string"
},
"span_id": {
"type": "string"
},
"parent_id": {
"type": "string"
},
"service_name": {
"type": "string"
},
"operation_name": {
"type": "string"
},
"start_time": {
"type": "string"
},
"duration": {
"type": "string"
},
"tags": {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/v1TagValue"
}
},
"is_error": {
"type": "boolean",
"format": "boolean",
"title": "Deprecated: use tags"
},
"container_ip": {
"type": "string"
},
"region_zone": {
"type": "string"
},
"mark": {
"type": "string"
}
}
},
"v1SpanSeriesReply": {
"type": "object",
"properties": {
"interval": {
"type": "string",
"format": "int64",
"title": "返回点的间隔"
},
"times": {
"type": "array",
"items": {
"type": "string"
}
},
"items": {
"type": "array",
"items": {
"$ref": "#/definitions/v1SeriesItem"
}
}
}
},
"v1TagValue": {
"type": "object",
"properties": {
"string_value": {
"type": "string"
},
"int64_value": {
"type": "string",
"format": "int64"
},
"bool_value": {
"type": "boolean",
"format": "boolean"
},
"float_value": {
"type": "number",
"format": "float"
}
}
},
"v1TraceReply": {
"type": "object",
"properties": {
"service_count": {
"type": "integer",
"format": "int32"
},
"span_count": {
"type": "integer",
"format": "int32"
},
"max_level": {
"type": "integer",
"format": "int32"
},
"root": {
"$ref": "#/definitions/v1Span"
}
}
}
}
}

View File

@@ -0,0 +1,57 @@
package v1
import (
"encoding/json"
"go-common/app/tool/protoc-gen-bm/jsonpb"
)
// MarshalJSON .
func (t *TagValue) MarshalJSON() ([]byte, error) {
return json.Marshal(t.Value)
}
// MarshalJSON .
func (t *TagValue_StringValue) MarshalJSON() ([]byte, error) {
return json.Marshal(t.StringValue)
}
// MarshalJSON .
func (t *TagValue_Int64Value) MarshalJSON() ([]byte, error) {
return json.Marshal(t.Int64Value)
}
// MarshalJSON .
func (t *TagValue_BoolValue) MarshalJSON() ([]byte, error) {
return json.Marshal(t.BoolValue)
}
// MarshalJSON .
func (t *TagValue_FloatValue) MarshalJSON() ([]byte, error) {
return json.Marshal(t.FloatValue)
}
// MarshalJSONPB .
func (t *TagValue) MarshalJSONPB(*jsonpb.Marshaler) ([]byte, error) {
return json.Marshal(t.Value)
}
// MarshalJSONPB .
func (t *TagValue_StringValue) MarshalJSONPB(*jsonpb.Marshaler) ([]byte, error) {
return json.Marshal(t.StringValue)
}
// MarshalJSONPB .
func (t *TagValue_Int64Value) MarshalJSONPB(*jsonpb.Marshaler) ([]byte, error) {
return json.Marshal(t.Int64Value)
}
// MarshalJSONPB .
func (t *TagValue_BoolValue) MarshalJSONPB(*jsonpb.Marshaler) ([]byte, error) {
return json.Marshal(t.BoolValue)
}
// MarshalJSONPB .
func (t *TagValue_FloatValue) MarshalJSONPB(*jsonpb.Marshaler) ([]byte, error) {
return json.Marshal(t.FloatValue)
}

View File

@@ -0,0 +1,42 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_binary",
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = ["dapper-query-example.toml"],
importpath = "go-common/app/service/main/dapper-query/cmd",
tags = ["automanaged"],
visibility = ["//visibility:private"],
deps = [
"//app/service/main/dapper-query/api/v1:go_default_library",
"//app/service/main/dapper-query/conf:go_default_library",
"//app/service/main/dapper-query/service:go_default_library",
"//app/service/main/dapper-query/util:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
],
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,19 @@
[log]
stdout = true
[bm]
addr = "127.0.0.1:6193"
timeout = "10s"
[hbase]
addrs = "nvm-test-dapper-influxdb-01"
[influx_db]
database = "dapper_uat"
addr = "http://172.22.33.146:8086"
[ops_log]
api = "http://uat-ops-log.bilibili.co/elasticsearch/_msearch"
[collectors]
nodes = ["172.16.38.143:6193"]

View File

@@ -0,0 +1,52 @@
package main
import (
"flag"
"log"
"os"
"os/signal"
"syscall"
apiv1 "go-common/app/service/main/dapper-query/api/v1"
"go-common/app/service/main/dapper-query/conf"
"go-common/app/service/main/dapper-query/service"
"go-common/app/service/main/dapper-query/util"
xlog "go-common/library/log"
bm "go-common/library/net/http/blademaster"
)
func main() {
if !flag.Parsed() {
flag.Parse()
}
// load config file
if err := conf.Init(); err != nil {
log.Fatalf("init config error: %s", err)
}
// init xlog
xlog.Init(nil)
defer xlog.Close()
xlog.Info("dapper-service start")
// new dapper service
srv, err := service.New(conf.Conf)
if err != nil {
log.Fatalf("new dapper service error: %s", err)
}
// init blademaster server
engine := bm.NewServer(nil)
engine.Use(bm.Recovery(), bm.Logger())
engine.Ping(func(*bm.Context) {})
engine.Inject("^/x/internal/dapper/ops-log", util.SessionIDMiddleware)
apiv1.RegisterDapperQueryBMServer(engine, srv)
if err := engine.Start(); err != nil {
log.Fatalf("start bm server error: %s", err)
}
sch := make(chan os.Signal, 1)
signal.Notify(sch, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
s := <-sch
// program exit
xlog.Info("dapper-service get a signal %s", s.String())
xlog.Info("dapper-service exit")
}

View File

@@ -0,0 +1,32 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["config.go"],
importpath = "go-common/app/service/main/dapper-query/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/conf:go_default_library",
"//library/log:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,108 @@
package conf
import (
"errors"
"flag"
"github.com/BurntSushi/toml"
"go-common/library/conf"
"go-common/library/log"
xtime "go-common/library/time"
)
func init() {
flag.StringVar(&confPath, "conf", "", "config file")
}
var (
confPath string
// Conf conf
Conf = &Config{}
client *conf.Client
)
// Config config.
type Config struct {
Log *log.Config `toml:"log"`
HBase *HBaseConfig `toml:"hbase"`
InfluxDB *InfluxDBConfig `toml:"influx_db"`
OpsLog *OpsLog `toml:"ops_log"`
Collectors *Collectors `toml:"collectors"`
}
// InfluxDBConfig InfluxDBConfig
type InfluxDBConfig struct {
Addr string `toml:"addr"`
Username string `toml:"username"`
Password string `toml:"password"`
Database string `toml:"database"`
}
// HBaseConfig hbase config
type HBaseConfig struct {
Namespace string `toml:"namespace"`
Addrs string `toml:"addrs"`
RPCQueueSize int `toml:"rpc_queue_size"`
FlushInterval xtime.Duration `toml:"flush_interval"`
EffectiveUser string `toml:"effective_user"`
RegionLookupTimeout xtime.Duration `toml:"region_lookup_timeout"`
RegionReadTimeout xtime.Duration `toml:"region_read_timeout"`
}
// OpsLog .
type OpsLog struct {
API string `toml:"api"`
}
// Collectors collector config
type Collectors struct {
Nodes []string `toml:"nodes"`
}
// Init config
func Init() (err error) {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if err := load(); err != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Toml2(); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return errors.New("could not decode config")
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,53 @@
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
"go_test",
)
go_library(
name = "go_default_library",
srcs = [
"dao.go",
"util.go",
],
importpath = "go-common/app/service/main/dapper-query/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper-query/conf:go_default_library",
"//app/service/main/dapper-query/model:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/dgryski/go-farm:go_default_library",
"//vendor/github.com/influxdata/influxdb/client/v2:go_default_library",
"//vendor/github.com/tsuna/gohbase:go_default_library",
"//vendor/github.com/tsuna/gohbase/filter:go_default_library",
"//vendor/github.com/tsuna/gohbase/hrpc:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
go_test(
name = "go_default_test",
srcs = ["dao_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
deps = [
"//app/service/main/dapper-query/conf:go_default_library",
"//vendor/github.com/smartystreets/goconvey/convey:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
)

View File

@@ -0,0 +1,506 @@
package dao
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"math"
"sort"
"strconv"
"strings"
influxdb "github.com/influxdata/influxdb/client/v2"
"github.com/tsuna/gohbase"
"github.com/tsuna/gohbase/filter"
"github.com/tsuna/gohbase/hrpc"
"go-common/app/service/main/dapper-query/conf"
"go-common/app/service/main/dapper-query/model"
"go-common/library/log"
)
// Order
const (
TimeDesc = "time:desc"
TimeAsc = "time:asc"
DurationDesc = "duration:desc"
DurationAsc = "duration:asc"
)
// ErrNotFound data not found
var ErrNotFound = errors.New("not found")
// Selector to selector span
type Selector struct {
Start int64
End int64
Limit int
Offset int
OnlyError bool
}
// table name
const (
DefaultHbaseNameSpace = "ugc"
DefaultInfluxDatabase = "dapper"
HbaseRawTraceTable = "DapperRawtrace"
HbaseRawTraceFamily = "pb"
HbaseListIdxTable = "DapperListidx"
HbaseListIdxFamily = "kind"
ServiceNameTag = "service_name"
OperationNameTag = "operation_name"
PeerServiceTag = "peer.service"
SpanKindTag = "span.kind"
MaxDurationField = "max_duration"
MinDurationField = "min_duration"
AvgDurationField = "avg_duration"
SpanpointMeasurement = "span_point"
ErrorsField = "errors"
)
// Dao dapper dao
type Dao interface {
// list all ServiceNames
ServiceNames(ctx context.Context) ([]string, error)
// list OperationName for specifically service
OperationNames(ctx context.Context, serviceName string) ([]string, error)
// QuerySpan by family title and sel
QuerySpanList(ctx context.Context, serviceName, operationName string, sel *Selector, order string) ([]model.SpanListRef, error)
// Trace get trace by trace id span sort by start_time
Trace(ctx context.Context, traceID uint64, spanIDs ...uint64) ([]*model.Span, error)
// PeerService query all peer service depend by service
PeerService(ctx context.Context, serviceName string) ([]string, error)
// Ping pong
Ping(ctx context.Context) error
// Close dao
Close(ctx context.Context) error
// SupportOrder checkou order support
SupportOrder(order string) bool
// MeanOperationNameField
MeanOperationNameField(ctx context.Context, whereMap map[string]string, field string, start, end int64, groupby []string) ([]model.MeanOperationNameValue, error)
// SpanSeriesMean
SpanSeriesMean(ctx context.Context, serviceName, operationName string, fields []string, start, end, interval int64) (*model.Series, error)
// SpanSeriesCount
SpanSeriesCount(ctx context.Context, serviceName, operationName string, fields []string, start, end, interval int64) (*model.Series, error)
}
type dao struct {
hbaseNameSpace string
hbaseClient gohbase.Client
influxDatabase string
influxdbClient influxdb.Client
}
func (d *dao) Ping(ctx context.Context) error {
return nil
}
func (d *dao) Close(ctx context.Context) error {
d.hbaseClient.Close()
return d.influxdbClient.Close()
}
// New dao
func New(cfg *conf.Config) (Dao, error) {
// disable rpc queue
hbaseClient := gohbase.NewClient(cfg.HBase.Addrs, gohbase.RpcQueueSize(0))
hbaseNameSpace := DefaultHbaseNameSpace
if cfg.HBase.Namespace != "" {
hbaseNameSpace = cfg.HBase.Namespace
}
influxdbCfg := influxdb.HTTPConfig{Addr: cfg.InfluxDB.Addr, Username: cfg.InfluxDB.Username, Password: cfg.InfluxDB.Password}
influxdbClient, err := influxdb.NewHTTPClient(influxdbCfg)
if err != nil {
return nil, err
}
influxDatabase := DefaultInfluxDatabase
if cfg.InfluxDB.Database != "" {
influxDatabase = cfg.InfluxDB.Database
}
return &dao{
hbaseNameSpace: hbaseNameSpace,
hbaseClient: hbaseClient,
influxDatabase: influxDatabase,
influxdbClient: influxdbClient,
}, nil
}
func (d *dao) ServiceNames(ctx context.Context) ([]string, error) {
where := fmt.Sprintf("%s = '%s'", SpanKindTag, "server")
return d.showTagValues(ctx, ServiceNameTag, where)
}
func (d *dao) showTagValues(ctx context.Context, tag, where string) ([]string, error) {
command := fmt.Sprintf(`SHOW TAG VALUES FROM "%s" WITH KEY = "%s" WHERE %s`,
SpanpointMeasurement, tag, where)
log.V(10).Info("query command %s", command)
query := influxdb.NewQuery(command, d.influxDatabase, "1s")
resp, err := d.influxdbClient.Query(query)
if err != nil {
return nil, err
}
if len(resp.Results) == 0 || len(resp.Results[0].Series) == 0 {
return make([]string, 0), nil
}
rows := resp.Results[0].Series[0]
values := make([]string, 0, len(rows.Values))
for _, kv := range rows.Values {
if len(kv) != 2 {
continue
}
if value, ok := kv[1].(string); ok {
values = append(values, value)
}
}
return values, nil
}
func (d *dao) OperationNames(ctx context.Context, serviceName string) ([]string, error) {
where := fmt.Sprintf("%s = '%s' AND %s = '%s'", ServiceNameTag, serviceName, SpanKindTag, "server")
return d.showTagValues(ctx, OperationNameTag, where)
}
func (d *dao) QuerySpanList(ctx context.Context, serviceName string, operationName string, sel *Selector, order string) ([]model.SpanListRef, error) {
log.V(10).Info("query span list serviceName: %s, operationName: %s, sel: %+v order: %s", serviceName, operationName, sel, order)
prefix := keyPrefix(serviceName, operationName)
startKey, stopKey := rangeKey(prefix, sel.Start, sel.End)
switch order {
case TimeAsc, TimeDesc:
return d.querySpanListTimeOrder(ctx, startKey, stopKey, prefix, sel.Limit, sel.Offset, order == TimeDesc, sel.OnlyError)
case DurationDesc, DurationAsc:
return d.querySpanListDurationOrder(ctx, startKey, stopKey, prefix, sel.Limit, sel.Offset, order == DurationDesc, sel.OnlyError)
}
return nil, fmt.Errorf("unsupport order")
}
func parseSpanListRef(cell *hrpc.Cell) (spanListRef model.SpanListRef, err error) {
value := cell.Value
ref := bytes.SplitN(value, []byte(":"), 2)
if len(ref) != 2 {
err = fmt.Errorf("invalid ref %s", value)
return
}
if spanListRef.TraceID, err = strconv.ParseUint(string(ref[0]), 16, 64); err != nil {
return
}
spanListRef.SpanID, err = strconv.ParseUint(string(ref[1]), 16, 64)
if err != nil {
return
}
kd := bytes.SplitN(cell.Qualifier, []byte(":"), 2)
if len(kd) != 2 {
err = fmt.Errorf("invalid qualifier %s", cell.Qualifier)
return
}
if bytes.Equal(kd[0], []byte("e")) {
spanListRef.IsError = true
}
spanListRef.Duration, err = strconv.ParseInt(string(kd[1]), 10, 64)
return
}
type minHeapifyListRef []model.SpanListRef
func (m minHeapifyListRef) push(listRef model.SpanListRef) {
if m[0].Duration > listRef.Duration {
return
}
m[0] = listRef
m.minHeapify(0)
}
func (m minHeapifyListRef) minHeapify(i int) {
var lowest int
left := (i+1)*2 - 1
right := (i + 1) * 2
if left < len(m) && m[left].Duration < m[i].Duration {
lowest = left
} else {
lowest = i
}
if right < len(m) && m[right].Duration < m[lowest].Duration {
lowest = right
}
if lowest != i {
m[i], m[lowest] = m[lowest], m[i]
m.minHeapify(lowest)
}
}
func (m minHeapifyListRef) Len() int {
return len(m)
}
func (m minHeapifyListRef) Less(i, j int) bool {
return m[i].Duration > m[j].Duration
}
func (m minHeapifyListRef) Swap(i, j int) {
m[i], m[j] = m[j], m[i]
}
func (d *dao) querySpanListDurationOrder(ctx context.Context, startKey, stopKey, prefix string, limit, offset int, reverse bool, onlyError bool) ([]model.SpanListRef, error) {
var options []func(hrpc.Call) error
options = append(options, hrpc.Filters(filter.NewPrefixFilter([]byte(prefix))))
if reverse {
startKey, stopKey = stopKey, startKey
options = append(options, hrpc.Reversed())
}
table := d.hbaseNameSpace + ":" + HbaseListIdxTable
scan, err := hrpc.NewScanRangeStr(ctx, table, startKey, stopKey, options...)
if err != nil {
return nil, err
}
scanner := d.hbaseClient.Scan(scan)
defer scanner.Close()
spanListRefs := make(minHeapifyListRef, limit+offset)
for {
result, err := scanner.Next()
if err != nil {
if err != io.EOF {
return nil, err
}
break
}
if len(result.Cells) > 0 {
log.V(10).Info("scan rowkey %s", result.Cells[0].Row)
}
for _, cell := range result.Cells {
if string(cell.Family) != HbaseListIdxFamily {
continue
}
spanListRef, err := parseSpanListRef(cell)
if err != nil {
// ignored error?
return nil, err
}
if onlyError && !spanListRef.IsError {
continue
}
if !reverse {
spanListRef.Duration = math.MaxInt64 - spanListRef.Duration
}
spanListRefs.push(spanListRef)
}
}
sort.Sort(spanListRefs)
for i := range spanListRefs[offset:] {
if spanListRefs[offset+i].TraceID == 0 {
spanListRefs = spanListRefs[:offset+i]
break
}
}
return spanListRefs[offset:], nil
}
func (d *dao) querySpanListTimeOrder(ctx context.Context, startKey, stopKey, prefix string, limit, offset int, reverse bool, onlyError bool) ([]model.SpanListRef, error) {
var options []func(hrpc.Call) error
options = append(options, hrpc.Filters(filter.NewPrefixFilter([]byte(prefix))))
if reverse {
startKey, stopKey = stopKey, startKey
options = append(options, hrpc.Reversed())
}
table := d.hbaseNameSpace + ":" + HbaseListIdxTable
scan, err := hrpc.NewScanRangeStr(ctx, table, startKey, stopKey, options...)
if err != nil {
return nil, err
}
scanner := d.hbaseClient.Scan(scan)
defer scanner.Close()
spanListRefs := make([]model.SpanListRef, 0, limit)
for {
result, err := scanner.Next()
if err != nil {
if err != io.EOF {
return nil, err
}
break
}
if len(result.Cells) > 0 {
log.V(10).Info("scan rowkey %s", result.Cells[0].Row)
}
for _, cell := range result.Cells {
if string(cell.Family) != HbaseListIdxFamily {
continue
}
spanListRef, err := parseSpanListRef(cell)
if err != nil {
// ignored error?
return nil, err
}
if onlyError && !spanListRef.IsError {
continue
}
if offset > 0 {
offset--
continue
}
if limit <= 0 {
break
}
if err != nil {
// ignored error?
return nil, err
}
spanListRefs = append(spanListRefs, spanListRef)
limit--
}
}
return spanListRefs, nil
}
func (d *dao) Trace(ctx context.Context, traceID uint64, spanIDs ...uint64) ([]*model.Span, error) {
table := d.hbaseNameSpace + ":" + HbaseRawTraceTable
traceIDStr := strconv.FormatUint(traceID, 16)
var options []func(hrpc.Call) error
if len(spanIDs) != 0 {
filters := make([]filter.Filter, 0, len(spanIDs))
for _, spanID := range spanIDs {
spanIDStr := strconv.FormatUint(spanID, 16)
filters = append(filters, filter.NewColumnPrefixFilter([]byte(spanIDStr)))
}
options = append(options, hrpc.Filters(filter.NewList(filter.MustPassOne, filters...)))
}
get, err := hrpc.NewGetStr(ctx, table, traceIDStr, options...)
if err != nil {
return nil, err
}
result, err := d.hbaseClient.Get(get)
if err != nil {
return nil, err
}
spans := make([]*model.Span, 0, len(result.Cells))
for _, cell := range result.Cells {
if string(cell.Family) != HbaseRawTraceFamily {
continue
}
span, err := model.FromProtoSpan(cell.Value)
if err != nil {
// TODO: ignore error?
log.Error("unmarshal protobuf span data rowkey: %x, cf: %s:%s error: %s", traceID, cell.Family, cell.Qualifier, err)
continue
}
spans = append(spans, span)
}
sort.Slice(spans, func(i, j int) bool {
return spans[i].StartTime.UnixNano() > spans[j].StartTime.UnixNano()
})
return spans, nil
}
func (d *dao) SupportOrder(order string) bool {
switch order {
case TimeAsc, TimeDesc, DurationAsc, DurationDesc:
return true
}
return false
}
func (d *dao) MeanOperationNameField(ctx context.Context, whereMap map[string]string, field string, start, end int64, orderby []string) ([]model.MeanOperationNameValue, error) {
var wheres []string
for k, v := range whereMap {
wheres = append(wheres, fmt.Sprintf(`%s='%s'`, k, v))
}
command := fmt.Sprintf(`SELECT mean("%s") AS mean_%s FROM "%s" WHERE %s AND time > %ds AND time < %ds GROUP BY %s`,
field,
field,
SpanpointMeasurement,
strings.Join(wheres, " AND "),
start, end,
strings.Join(orderby, ", "),
)
log.V(10).Info("query command %s", command)
query := influxdb.NewQuery(command, d.influxDatabase, "1s")
resp, err := d.influxdbClient.Query(query)
if err != nil {
return nil, err
}
if len(resp.Results) == 0 || len(resp.Results[0].Series) == 0 {
return make([]model.MeanOperationNameValue, 0), nil
}
values := make([]model.MeanOperationNameValue, 0, len(resp.Results[0].Series))
for _, row := range resp.Results[0].Series {
value := model.MeanOperationNameValue{Tag: row.Tags}
if len(row.Values) == 0 || len(row.Values[0]) < 2 {
continue
}
valStr, ok := row.Values[0][1].(json.Number)
if !ok {
continue
}
// 相信 ifluxdb 不会瞎返回的
value.Value, _ = valStr.Float64()
values = append(values, value)
}
return values, nil
}
func (d *dao) SpanSeriesMean(ctx context.Context, serviceName, operationName string, fields []string, start, end, interval int64) (*model.Series, error) {
return d.spanSeries(ctx, serviceName, operationName, "mean", fields, start, end, interval)
}
func (d *dao) SpanSeriesCount(ctx context.Context, serviceName, operationName string, fields []string, start, end, interval int64) (*model.Series, error) {
return d.spanSeries(ctx, serviceName, operationName, "count", fields, start, end, interval)
}
func (d *dao) spanSeries(ctx context.Context, serviceName, operationName, fn string, fields []string, start, end, interval int64) (*model.Series, error) {
var selects []string
for _, field := range fields {
selects = append(selects, fmt.Sprintf(`%s("%s") AS "%s_%s"`, fn, field, fn, field))
}
command := fmt.Sprintf(`SELECT %s FROM %s WHERE "%s"='%s' AND "%s"='%s' AND time > %ds AND time < %ds GROUP BY time(%ds) FILL(null)`,
strings.Join(selects, ", "),
SpanpointMeasurement,
ServiceNameTag, serviceName,
OperationNameTag, operationName,
start, end, interval)
log.V(10).Info("query command %s", command)
query := influxdb.NewQuery(command, d.influxDatabase, "1s")
resp, err := d.influxdbClient.Query(query)
if err != nil {
return nil, err
}
if len(resp.Results) == 0 || len(resp.Results[0].Series) == 0 {
return new(model.Series), nil
}
series := new(model.Series)
fieldMap := make(map[int]*model.SeriesItem)
for _, row := range resp.Results[0].Series {
// first colums is time
for i, name := range row.Columns {
if name == "time" {
continue
}
fieldMap[i] = &model.SeriesItem{Field: name}
}
for _, value := range row.Values {
for i, val := range value {
if i == 0 {
timestamp, _ := val.(json.Number).Int64()
series.Timestamps = append(series.Timestamps, timestamp)
continue
}
n, ok := val.(json.Number)
if !ok {
fieldMap[i].Rows = append(fieldMap[i].Rows, nil)
}
v, _ := n.Float64()
fieldMap[i].Rows = append(fieldMap[i].Rows, &v)
}
}
}
for _, v := range fieldMap {
series.Items = append(series.Items, v)
}
return series, nil
}
func (d *dao) PeerService(ctx context.Context, serviceName string) ([]string, error) {
where := fmt.Sprintf("%s = '%s' AND %s = '%s'", ServiceNameTag, serviceName, SpanKindTag, "client")
return d.showTagValues(ctx, PeerServiceTag, where)
}

View File

@@ -0,0 +1,225 @@
package dao
import (
"context"
"flag"
"fmt"
"log"
"os"
"strconv"
"testing"
"time"
. "github.com/smartystreets/goconvey/convey"
"go-common/app/service/main/dapper-query/conf"
"golang.org/x/sys/unix"
)
var cfg *conf.Config
var flagMap = map[string]string{
"app_id": "main.common-arch.dapper-query",
"conf_appid": "main.common-arch.dapper-query",
"conf_token": "ed3241c850735df94d24d7b49f69ddd7",
"tree_id": "60617",
"conf_version": "docker-1",
"deploy_env": "uat",
"conf_env": "uat",
"conf_host": "config.bilibili.co",
"conf_path": os.TempDir(),
"region": "sh",
"zone": "sh001",
}
// only for ut runner
func hackHosts() {
hostsPath := "/etc/hosts"
if unix.Access(hostsPath, unix.W_OK) != nil {
return
}
fp, err := os.OpenFile(hostsPath, os.O_WRONLY, 0644)
if err != nil {
log.Printf("open hosts file error: %s", err)
}
defer fp.Close()
fmt.Fprintf(fp, "\n")
fmt.Fprintln(fp, "172.22.33.146 nvm-test-dapper-influxdb-01")
}
func TestMain(m *testing.M) {
hackHosts()
for key, val := range flagMap {
flag.Set(key, val)
}
flag.Parse()
if err := conf.Init(); err != nil {
log.Printf("init config from remote error: %s", err)
}
if hbaseAddrs := os.Getenv("TEST_HBASE_ADDRS"); hbaseAddrs != "" {
cfg = new(conf.Config)
cfg.HBase = &conf.HBaseConfig{Addrs: hbaseAddrs, Namespace: "ugc"}
if influxdbAddr := os.Getenv("TEST_INFLUXDB_ADDR"); influxdbAddr != "" {
cfg.InfluxDB = &conf.InfluxDBConfig{Addr: influxdbAddr, Database: "dapper_uat"}
}
}
if cfg == nil {
cfg = conf.Conf
if cfg.InfluxDB != nil {
cfg.InfluxDB.Database = "dapper_uat"
}
}
os.Exit(m.Run())
}
func TestDao(t *testing.T) {
if cfg == nil {
t.Skipf("no config provide skipped")
}
daoImpl, err := New(cfg)
if err != nil {
t.Fatalf("new dao error: %s", err)
}
ctx := context.Background()
serviceName := "main.community.tag"
operationName := "/x/internal/tag/archive/tags"
Convey("query serviceNames", t, func() {
serviceNames, err := daoImpl.ServiceNames(ctx)
So(err, ShouldBeNil)
So(serviceNames, ShouldNotBeEmpty)
t.Logf("serviceNames: %v", serviceNames)
Convey("query operationNames", func() {
// FIXME: make mock data frist
operationNames, err := daoImpl.OperationNames(ctx, serviceName)
So(err, ShouldBeNil)
So(operationNames, ShouldNotBeEmpty)
t.Logf("operationNames for %s :%v", serviceName, operationNames)
})
})
Convey("test QuerySpanListTime Asc", t, func() {
// FIXME: make mock data frist
spanListRefs, err := daoImpl.QuerySpanList(ctx, serviceName, operationName, &Selector{
Start: time.Now().Unix() - 3600,
End: time.Now().Unix(),
Limit: 10,
Offset: 10,
}, TimeAsc)
So(err, ShouldBeNil)
So(spanListRefs, ShouldNotBeEmpty)
t.Logf("spanListRefs: %v", spanListRefs)
})
Convey("test QuerySpanListTime Desc", t, func() {
// FIXME: make mock data frist
spanListRefs, err := daoImpl.QuerySpanList(ctx, serviceName, operationName, &Selector{
Start: time.Now().Unix() - 3600*12,
End: time.Now().Unix(),
Limit: 10,
Offset: 10,
}, TimeDesc)
So(err, ShouldBeNil)
So(spanListRefs, ShouldNotBeEmpty)
t.Logf("spanListRefs: %v", spanListRefs)
Convey("test get trace", func() {
spanListRef := spanListRefs[0]
spans, err := daoImpl.Trace(ctx, spanListRef.TraceID)
So(err, ShouldBeNil)
So(spans, ShouldNotBeEmpty)
t.Logf("spans %v", spans)
})
})
Convey("test QuerySpanListDuration Desc", t, func() {
// FIXME: make mock data frist
spanListRefs, err := daoImpl.QuerySpanList(ctx, serviceName, operationName, &Selector{
Start: time.Now().Unix() - 3600*12,
End: time.Now().Unix(),
Limit: 10,
Offset: 10,
}, DurationDesc)
So(err, ShouldBeNil)
So(spanListRefs, ShouldNotBeEmpty)
t.Logf("spanListRefs: %v", spanListRefs)
Convey("test get trace", func() {
spanListRef := spanListRefs[len(spanListRefs)-1]
spans, err := daoImpl.Trace(ctx, spanListRef.TraceID)
So(err, ShouldBeNil)
So(spans, ShouldNotBeEmpty)
t.Logf("spans %v", spans)
})
})
Convey("test QuerySpanListDuration Asc", t, func() {
// FIXME: make mock data frist
spanListRefs, err := daoImpl.QuerySpanList(ctx, serviceName, operationName, &Selector{
Start: time.Now().Unix() - 3600*12,
End: time.Now().Unix(),
Limit: 10,
Offset: 10,
}, DurationAsc)
So(err, ShouldBeNil)
So(spanListRefs, ShouldNotBeEmpty)
t.Logf("spanListRefs: %v", spanListRefs)
Convey("test get trace", func() {
spanListRef := spanListRefs[len(spanListRefs)-1]
spans, err := daoImpl.Trace(ctx, spanListRef.TraceID)
So(err, ShouldBeNil)
So(spans, ShouldNotBeEmpty)
t.Logf("spans %v", spans)
})
})
Convey("test MeanOperationNameField", t, func() {
start := time.Now().Unix() - 3600
end := time.Now().Unix()
values, err := daoImpl.MeanOperationNameField(ctx, map[string]string{"service_name": serviceName}, "max_duration", start, end, []string{"operation_name"})
if err != nil {
t.Error(err)
}
So(values, ShouldNotBeEmpty)
})
Convey("test SpanSeriesMean", t, func() {
start := time.Now().Unix() - 3600
end := time.Now().Unix()
series, err := daoImpl.SpanSeriesMean(ctx, serviceName, operationName, []string{"max_duration", "min_duration"}, start, end, 30)
if err != nil {
t.Error(err)
}
So(series.Timestamps, ShouldNotBeEmpty)
So(series.Items, ShouldNotBeEmpty)
// FIXME
//for _, item := range series.Items {
// So(len(series.Timestamps), ShouldEqual, len(item.Rows))
//}
t.Logf("%#v\n", series)
})
Convey("test SpanSeriesCount", t, func() {
start := time.Now().Unix() - 3600
end := time.Now().Unix()
series, err := daoImpl.SpanSeriesCount(ctx, serviceName, operationName, []string{"max_duration", "min_duration"}, start, end, 30)
if err != nil {
t.Error(err)
}
So(series.Timestamps, ShouldNotBeEmpty)
So(series.Items, ShouldNotBeEmpty)
// FIXME
//for _, item := range series.Items {
// So(len(series.Timestamps), ShouldEqual, len(item.Rows))
//}
t.Logf("%#v\n", series)
})
Convey("test PeerService", t, func() {
serviceName := "main.bangumi.season-service"
peerServices, err := daoImpl.PeerService(ctx, serviceName)
if err != nil {
t.Error(err)
}
So(peerServices, ShouldNotBeEmpty)
})
Convey("test trace", t, func() {
traceID, _ := strconv.ParseUint("100056da7886666c", 16, 64)
spans, err := daoImpl.Trace(ctx, traceID)
if err != nil {
t.Error(err)
}
So(spans, ShouldNotBeEmpty)
})
Convey("test ping close", t, func() {
So(daoImpl.Ping(ctx), ShouldBeNil)
So(daoImpl.Close(ctx), ShouldBeNil)
})
}

View File

@@ -0,0 +1,4 @@
#!/bin/bash
export TEST_INFLUXDB_ADDR=http://172.22.33.146:8086
export TEST_HBASE_ADDRS=172.22.33.146

View File

@@ -0,0 +1,18 @@
package dao
import (
"fmt"
"strconv"
"github.com/dgryski/go-farm"
)
func rangeKey(prefix string, start, end int64) (string, string) {
return prefix + strconv.FormatInt(start, 10), prefix + strconv.FormatInt(end, 10)
}
func keyPrefix(serviceName, operationName string) string {
serviceNameHash := farm.Hash32([]byte(serviceName))
operationNameHash := farm.Hash32([]byte(operationName))
return fmt.Sprintf("%x%x", serviceNameHash, operationNameHash)
}

View File

@@ -0,0 +1,42 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"const.go",
"rank.go",
"series.go",
"span.go",
"spanpoint.go",
"spanref.go",
"util.go",
],
importpath = "go-common/app/service/main/dapper-query/model",
tags = ["manual"],
visibility = ["//visibility:public"],
deps = [
"//library/net/trace/proto:go_default_library",
"@com_github_golang_protobuf//proto:go_default_library",
"@io_bazel_rules_go//proto/wkt:duration_go_proto",
"@io_bazel_rules_go//proto/wkt:timestamp_go_proto",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,18 @@
package model
// RankType
const (
MaxDurationRank = "max_duration"
MinDurationRank = "min_duration"
AvgDurationRank = "avg_duration"
ErrorsRank = "errors"
)
// VerifyRankType .
func VerifyRankType(rankType string) bool {
switch rankType {
case MaxDurationRank, MinDurationRank, AvgDurationRank, ErrorsRank:
return true
}
return false
}

View File

@@ -0,0 +1,18 @@
package model
import (
"sort"
)
// MeanOperationNameValue .
type MeanOperationNameValue struct {
Tag map[string]string
Value float64
}
// SortRank Desc
func SortRank(values []MeanOperationNameValue) {
sort.Slice(values, func(i, j int) bool {
return values[i].Value > values[j].Value
})
}

View File

@@ -0,0 +1,13 @@
package model
// SeriesItem .
type SeriesItem struct {
Field string
Rows []*float64
}
// Series data
type Series struct {
Timestamps []int64
Items []*SeriesItem
}

View File

@@ -0,0 +1,97 @@
package model
import (
"strconv"
"time"
)
// RefType Kind
const (
RefTypeChildOf int8 = iota
RefTypeFollowsFrom
)
// TagKind
const (
TagString int8 = iota
TagInt
TagBool
TagFloat
)
// SpanRef describes causal relationship of the current span to another span (e.g. 'child-of')
type SpanRef struct {
RefType int8
TraceID uint64
SpanID uint64
}
// Tag span tag
type Tag struct {
Kind int8
Key string
Value interface{}
}
// Field log field
type Field struct {
Key string
Value []byte
}
// Log span log
type Log struct {
Timestamp int64
Fields []Field
}
// Span represents a named unit of work performed by a service.
type Span struct {
ServiceName string
OperationName string
TraceID uint64
SpanID uint64
ParentID uint64
Env string
StartTime time.Time
Duration time.Duration
References []SpanRef
Tags map[string]interface{}
Logs []Log
}
// TraceIDStr return hex format trace_id
func (s *Span) TraceIDStr() string {
return strconv.FormatUint(s.TraceID, 16)
}
// SpanIDStr return hex format span_id
func (s *Span) SpanIDStr() string {
return strconv.FormatUint(s.SpanID, 16)
}
// ParentIDStr return hex format parent_id
func (s *Span) ParentIDStr() string {
return strconv.FormatUint(s.ParentID, 16)
}
// IsServer span kind is server
func (s *Span) IsServer() bool {
kind, ok := s.Tags["span.kind"].(string)
if !ok {
return false
}
return kind == "server"
}
// IsError is error happend
func (s *Span) IsError() bool {
isErr, _ := s.Tags["error"].(bool)
return isErr
}
// StringTag get string value from tag
func (s *Span) StringTag(key string) string {
val, _ := s.Tags[key].(string)
return val
}

View File

@@ -0,0 +1,24 @@
package model
// const for SpanPoint
const ()
// SamplePoint SamplePoint
type SamplePoint struct {
TraceID uint64
SpanID uint64
Value int64
}
// SpanPoint contains time series
type SpanPoint struct {
Timestamp int64
ServiceName string
OperationName string
PeerService string
SpanKind string
AvgDuration SamplePoint // random sample point
MaxDuration SamplePoint
MinDuration SamplePoint
Errors []SamplePoint
}

View File

@@ -0,0 +1,23 @@
package model
import (
"strconv"
)
// SpanListRef .
type SpanListRef struct {
TraceID uint64
SpanID uint64
IsError bool
Duration int64
}
// TraceIDStr hex format traceid
func (s SpanListRef) TraceIDStr() string {
return strconv.FormatUint(s.TraceID, 16)
}
// SpanIDStr hex format traceid
func (s SpanListRef) SpanIDStr() string {
return strconv.FormatUint(s.SpanID, 16)
}

View File

@@ -0,0 +1,175 @@
package model
import (
"encoding/binary"
"math"
"strconv"
"time"
"github.com/golang/protobuf/proto"
"github.com/golang/protobuf/ptypes/duration"
"github.com/golang/protobuf/ptypes/timestamp"
protogen "go-common/library/net/trace/proto"
)
const protoVersion2 int32 = 2
// FromProtoSpan convert protogen.Span to model.Span
func FromProtoSpan(data []byte) (*Span, error) {
protoSpan := new(protogen.Span)
if err := proto.Unmarshal(data, protoSpan); err != nil {
return nil, err
}
var span *Span
var err error
if protoSpan.Version != protoVersion2 {
span, err = fromProtoSpanLeagcy(protoSpan)
} else {
span, err = fromProtoSpanInternal(protoSpan)
}
return span, err
}
func convertLeagcyTag(protoTag *protogen.Tag) Tag {
tag := Tag{Key: protoTag.Key}
switch protoTag.Kind {
case protogen.Tag_STRING:
tag.Kind = TagString
tag.Value = string(protoTag.Value)
case protogen.Tag_INT:
tag.Kind = TagInt
tag.Value, _ = strconv.ParseInt(string(protoTag.Value), 10, 64)
case protogen.Tag_BOOL:
tag.Kind = TagBool
tag.Value, _ = strconv.ParseBool(string(protoTag.Value))
case protogen.Tag_FLOAT:
tag.Kind = TagFloat
tag.Value, _ = strconv.ParseFloat(string(protoTag.Value), 64)
}
return tag
}
func convertLeagcyLog(protoLog *protogen.Log) Log {
log := Log{Timestamp: protoLog.Timestamp}
log.Fields = []Field{{Key: protoLog.Key, Value: protoLog.Value}}
return log
}
func fromProtoSpanLeagcy(protoSpan *protogen.Span) (*Span, error) {
span := &Span{
ServiceName: protoSpan.ServiceName,
OperationName: protoSpan.OperationName,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.SpanId,
ParentID: protoSpan.ParentId,
Env: protoSpan.Env,
}
span.StartTime = time.Unix(protoSpan.StartAt/int64(time.Second), protoSpan.StartAt%int64(time.Second))
span.Duration = time.Duration(protoSpan.FinishAt - protoSpan.StartAt)
span.References = []SpanRef{{
RefType: RefTypeChildOf,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.ParentId,
}}
span.Tags = make(map[string]interface{})
for _, tag := range protoSpan.Tags {
newTag := convertLeagcyTag(tag)
span.Tags[newTag.Key] = newTag.Value
}
span.Logs = make([]Log, 0, len(protoSpan.Logs))
for _, log := range protoSpan.Logs {
span.Logs = append(span.Logs, convertLeagcyLog(log))
}
return span, nil
}
func timeFromTimestamp(t *timestamp.Timestamp) time.Time {
return time.Unix(t.Seconds, int64(t.Nanos))
}
func durationFromDuration(d *duration.Duration) time.Duration {
return time.Duration(d.Seconds*int64(time.Second) + int64(d.Nanos))
}
func convertSpanRef(protoRef *protogen.SpanRef) SpanRef {
ref := SpanRef{
TraceID: protoRef.TraceId,
SpanID: protoRef.SpanId,
}
switch protoRef.RefType {
case protogen.SpanRef_CHILD_OF:
ref.RefType = RefTypeChildOf
case protogen.SpanRef_FOLLOWS_FROM:
ref.RefType = RefTypeFollowsFrom
}
return ref
}
func unSerializeInt64(data []byte) int64 {
return int64(binary.BigEndian.Uint64(data))
}
func unSerializeBool(data []byte) bool {
return data[0] == byte(1)
}
func unSerializeFloat64(data []byte) float64 {
value := binary.BigEndian.Uint64(data)
return math.Float64frombits(value)
}
func convertTag(protoTag *protogen.Tag) Tag {
tag := Tag{Key: protoTag.Key}
switch protoTag.Kind {
case protogen.Tag_STRING:
tag.Kind = TagString
tag.Value = string(protoTag.Value)
case protogen.Tag_INT:
tag.Kind = TagInt
tag.Value = unSerializeInt64(protoTag.Value)
case protogen.Tag_BOOL:
tag.Kind = TagBool
tag.Value = unSerializeBool(protoTag.Value)
case protogen.Tag_FLOAT:
tag.Kind = TagFloat
tag.Value = unSerializeFloat64(protoTag.Value)
}
return tag
}
func convertLog(protoLog *protogen.Log) Log {
log := Log{Timestamp: protoLog.Timestamp}
log.Fields = make([]Field, 0, len(protoLog.Fields))
for _, protoFiled := range protoLog.Fields {
log.Fields = append(log.Fields, Field{Key: protoFiled.Key, Value: protoFiled.Value})
}
return log
}
func fromProtoSpanInternal(protoSpan *protogen.Span) (*Span, error) {
span := &Span{
ServiceName: protoSpan.ServiceName,
OperationName: protoSpan.OperationName,
TraceID: protoSpan.TraceId,
SpanID: protoSpan.SpanId,
Env: protoSpan.Env,
ParentID: protoSpan.ParentId,
StartTime: timeFromTimestamp(protoSpan.StartTime),
Duration: durationFromDuration(protoSpan.Duration),
}
span.References = make([]SpanRef, 0, len(protoSpan.References))
for _, ref := range protoSpan.References {
span.References = append(span.References, convertSpanRef(ref))
}
span.Tags = make(map[string]interface{})
for _, tag := range protoSpan.Tags {
newTag := convertTag(tag)
span.Tags[newTag.Key] = newTag.Value
}
span.Logs = make([]Log, 0, len(protoSpan.Logs))
for _, log := range protoSpan.Logs {
span.Logs = append(span.Logs, convertLog(log))
}
return span, nil
}

View File

@@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["clt.go"],
importpath = "go-common/app/service/main/dapper-query/pkg/cltclient",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//library/sync/errgroup:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,113 @@
// Package cltclient provide fetch and merge data from collector
package cltclient
import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"go-common/library/sync/errgroup"
)
const (
_jsonMime = "application/json"
)
// ClientStatusResp response clientstatus request just for debug
type ClientStatusResp struct {
QueueLen int `json:"queue_len"`
Clients []*ClientStatus `json:"clients"`
}
// ClientStatus client status
type ClientStatus struct {
Addr string `json:"addr"`
UpTime int64 `json:"up_time"`
ErrCount int64 `json:"err_count"`
Rate int64 `json:"rate"`
}
// CltStatus collector status
type CltStatus struct {
Node string `json:"node"`
QueueLen int `json:"queue_len"`
Clients []*ClientStatus `json:"clients"`
}
// New collector client
func New(nodes []string, httpclient *http.Client) (*Client, error) {
if len(nodes) == 0 {
return nil, fmt.Errorf("no node provided")
}
if httpclient == nil {
httpclient = http.DefaultClient
}
return &Client{nodes: nodes, httpclient: httpclient}, nil
}
// Client collector client
type Client struct {
nodes []string
httpclient *http.Client
}
// Status return all collector status
func (c *Client) Status(ctx context.Context) ([]*CltStatus, error) {
var mx sync.Mutex
var g errgroup.Group
results := make(map[string]*ClientStatusResp)
for _, node := range c.nodes {
node := node // https://golang.org/doc/faq#closures_and_goroutines
g.Go(func() error {
resp, err := c.fetchStatus(ctx, node)
if err != nil {
return err
}
mx.Lock()
results[node] = resp
mx.Unlock()
return nil
})
}
if err := g.Wait(); err != nil {
return nil, err
}
clts := make([]*CltStatus, 0, len(results))
for node, resp := range results {
clts = append(clts, &CltStatus{
Node: node,
QueueLen: resp.QueueLen,
Clients: resp.Clients,
})
}
return clts, nil
}
func (c *Client) fetchStatus(ctx context.Context, node string) (*ClientStatusResp, error) {
var wrapResp struct {
Code int `json:"code"`
Message string `json:"message"`
Data ClientStatusResp `json:"data"`
}
reqURL := "http://" + node + "/x/internal/dapper-collector/client-status"
req, err := http.NewRequest(http.MethodGet, reqURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("Accept", _jsonMime)
req = req.WithContext(ctx)
resp, err := c.httpclient.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode/100 != 2 {
p := make([]byte, 2048)
n, _ := resp.Body.Read(p)
return nil, fmt.Errorf("request url: %s status code: %d, body: %s", reqURL, resp.StatusCode, p[:n])
}
err = json.NewDecoder(resp.Body).Decode(&wrapResp)
return &wrapResp.Data, err
}

View File

@@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_test",
"go_library",
)
go_test(
name = "go_default_test",
srcs = ["opslog_test.go"],
embed = [":go_default_library"],
rundir = ".",
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["opslog.go"],
importpath = "go-common/app/service/main/dapper-query/pkg/opslog",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//vendor/github.com/pkg/errors:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,221 @@
// Package opslog provide ops-log api
package opslog
import (
"bytes"
"context"
"encoding/json"
"io"
"net/http"
"strconv"
"time"
"github.com/pkg/errors"
)
const (
_kbnVersion = "5.4.3"
_indexPrefix = "billions-"
_mqueryContentType = "application/x-ndjson"
_ajsSessioID = "_AJSESSIONID"
)
// Err errors
var (
ErrOverRange = errors.New("search time over range")
)
type response struct {
Hits struct {
Hits []struct {
Source map[string]interface{} `json:"_source"`
} `json:"hits"`
} `json:"hits"`
}
// Record represent log record
type Record struct {
Time time.Time `json:"timestamp"`
Fields map[string]interface{} `json:"fields"`
Level string `json:"level"`
Message string `json:"message"`
}
// Client query log from ops-log
type Client interface {
Query(ctx context.Context, familys []string, traceID uint64, sessionID string, start, end int64, options ...Option) ([]*Record, error)
}
type option struct {
traceField string
size int
level string
}
var _defaultOpt = option{
traceField: "traceid",
size: 100,
}
// Option for query
type Option func(opt *option)
// SetTraceField default "traceid"
func SetTraceField(traceField string) Option {
return func(opt *option) {
opt.traceField = traceField
}
}
// SetSize default 100
func SetSize(size int) Option {
return func(opt *option) {
opt.size = size
}
}
// SetLevel return all if level is empty
func SetLevel(level string) Option {
return func(opt *option) {
opt.level = level
}
}
// New ops-log client
func New(searchAPI string, httpClient *http.Client) Client {
if httpClient == nil {
httpClient = http.DefaultClient
}
return &client{
searchAPI: searchAPI,
httpclient: httpClient,
}
}
type client struct {
searchAPI string
httpclient *http.Client
}
func (c *client) Query(ctx context.Context, familys []string, traceID uint64, sessionID string, start, end int64, options ...Option) ([]*Record, error) {
if start <= 0 || end <= 0 {
return nil, ErrOverRange
}
if len(familys) == 0 {
return make([]*Record, 0), nil
}
opt := _defaultOpt
for _, fn := range options {
fn(&opt)
}
req, err := c.newReq(familys, traceID, sessionID, start, end, &opt)
if err != nil {
return nil, err
}
resp, err := c.httpclient.Do(req)
if err != nil {
return nil, errors.Wrapf(err, "send request to %s fail", c.searchAPI)
}
defer resp.Body.Close()
if resp.StatusCode/100 != 2 {
buf := make([]byte, 1024)
n, _ := resp.Body.Read(buf)
return nil, errors.Errorf("ops-log response error: status_code: %d, body: %s", resp.StatusCode, buf[:n])
}
return decodeRecord(resp.Body)
}
func (c *client) newReq(familys []string, traceID uint64, sessionID string, start, end int64, opt *option) (*http.Request, error) {
prefixTraceID := strconv.FormatUint(traceID, 16)
leagcyTraceID := strconv.FormatUint(traceID, 10)
startMillis := start * int64((time.Second / time.Millisecond))
endMillis := end * int64((time.Second / time.Millisecond))
body := &bytes.Buffer{}
enc := json.NewEncoder(body)
header := map[string]interface{}{"index": formatIndices(familys), "ignore_unavailable": true}
if err := enc.Encode(header); err != nil {
return nil, err
}
shoulds := []map[string]interface{}{
{"prefix": map[string]interface{}{opt.traceField: prefixTraceID}},
{"match": map[string]interface{}{opt.traceField: leagcyTraceID}},
}
traceQuery := map[string]interface{}{"bool": map[string]interface{}{"should": shoulds}}
rangeQuery := map[string]interface{}{
"range": map[string]interface{}{
"@timestamp": map[string]interface{}{"gte": startMillis, "lte": endMillis, "format": "epoch_millis"},
},
}
musts := []map[string]interface{}{traceQuery, rangeQuery}
if opt.level != "" {
musts = append(musts, map[string]interface{}{"match": map[string]interface{}{"level": opt.level}})
}
query := map[string]interface{}{
"sort": map[string]interface{}{
"@timestamp": map[string]interface{}{
"order": "desc",
"unmapped_type": "boolean",
},
},
"query": map[string]interface{}{
"bool": map[string]interface{}{"must": musts},
},
"version": true,
"size": opt.size,
}
if err := enc.Encode(query); err != nil {
return nil, err
}
req, err := http.NewRequest(http.MethodPost, c.searchAPI, body)
if err != nil {
return nil, err
}
session := &http.Cookie{Name: _ajsSessioID, Value: sessionID}
req.AddCookie(session)
req.Header.Set("Content-Type", _mqueryContentType)
req.Header.Set("kbn-version", _kbnVersion)
return req, nil
}
func decodeRecord(src io.Reader) ([]*Record, error) {
var resp struct {
Responses []response `json:"responses"`
}
if err := json.NewDecoder(src).Decode(&resp); err != nil {
return nil, errors.Wrap(err, "decode response error")
}
if len(resp.Responses) == 0 {
return nil, nil
}
records := make([]*Record, 0, len(resp.Responses[0].Hits.Hits))
for _, hit := range resp.Responses[0].Hits.Hits {
record := &Record{
Fields: make(map[string]interface{}),
}
for k, v := range hit.Source {
switch k {
case "@timestamp":
s, _ := v.(string)
record.Time, _ = time.Parse(time.RFC3339Nano, s)
case "log":
s, _ := v.(string)
record.Message = s
case "level":
s, _ := v.(string)
record.Level = s
default:
record.Fields[k] = v
}
}
records = append(records, record)
}
return records, nil
}
func formatIndices(familys []string) []string {
indices := make([]string, len(familys))
for i := range familys {
indices[i] = _indexPrefix + familys[i] + "*"
}
return indices
}

View File

@@ -0,0 +1,68 @@
package opslog
import (
"bufio"
"context"
"fmt"
"net/http"
"net/http/httptest"
"os"
"strconv"
"strings"
"testing"
"time"
)
func TestOpsLog(t *testing.T) {
testSessionID := "c860e25e5360fc08888a3aaf8c7a0bec"
testResponse := `{"responses":[{"took":4,"timed_out":false,"_shards":{"total":8,"successful":8,"failed":0},"hits":{"total":2,"max_score":null,"hits":[{"_index":"billions-main.web-svr.web-interface-@2018.09.10-uat-1","_type":"logs","_id":"AWXBhTtRe-NhC44S955A","_version":1,"_score":null,"_source":{"@timestamp":"2018-09-10T03:27:34.42933Z","app_id":"main.web-svr.web-interface","args":"","env":"uat","error":"","instance_id":"web-interface-32096-758958f64f-k4gnc","ip":"172.22.35.133:9000","level":"INFO","level_value":1,"path":"/passport.service.identify.v1.Identify/GetCookieInfo","ret":0,"source":"go-common/library/net/rpc/warden.logging:195","stack":"\u003cnil\u003e","traceid":"2406767965117552819","ts":0.001041696,"user":"","zone":"sh001"},"fields":{"@timestamp":[1536550054429]},"highlight":{"traceid":["@kibana-highlighted-field@2406767965117552819@/kibana-highlighted-field@"]},"sort":[1536550054429]},{"_index":"billions-main.web-svr.web-interface-@2018.09.10-uat-1","_type":"logs","_id":"AWXBhTfFS1y0J6vacgAH","_version":1,"_score":null,"_source":{"@timestamp":"2018-09-10T03:27:34.429376Z","app_id":"main.web-svr.web-interface","env":"uat","err":"-101","instance_id":"web-interface-32096-758958f64f-k4gnc","ip":"10.23.50.21","level":"ERROR","level_value":3,"method":"GET","mid":null,"msg":"账号未登录","params":"","path":"/x/web-interface/nav","ret":-101,"source":"go-common/library/net/http/blademaster.Logger.func1:46","stack":"-101","traceid":"2406767965117552819","ts":0.001167299,"user":"no_user","zone":"sh001"},"fields":{"@timestamp":[1536550054429]},"highlight":{"traceid":["@kibana-highlighted-field@2406767965117552819@/kibana-highlighted-field@"]},"sort":[1536550054429]}]},"aggregations":{"2":{"buckets":[{"key_as_string":"2018-09-10T09:00:00.000+08:00","key":1536541200000,"doc_count":2}]}},"status":200}]}`
svr := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
session, err := r.Cookie(_ajsSessioID)
if err != nil || session.Value != testSessionID {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "invalid session id: %s", session.Value)
return
}
bufReader := bufio.NewReader(r.Body)
first, err := bufReader.ReadString('\n')
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}
if !strings.Contains(first, "billions-main.web-svr.web-interface*") {
w.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(w, "invalid familys: %s", first)
return
}
w.Write([]byte(testResponse))
}))
defer svr.Close()
client := New(svr.URL, nil)
end := time.Now().Unix()
start := end - 3600
familys := []string{"main.web-svr.web-interface"}
records, err := client.Query(context.Background(), familys, 8111326167741382285, testSessionID, start, end)
if err != nil {
t.Fatal(err)
}
for _, record := range records {
t.Logf("record: %v", record)
}
}
func TestOpsLogReal(t *testing.T) {
sessionID := os.Getenv("TEST_SESSION_ID")
if sessionID == "" {
t.Skipf("miss sessionID skip test")
}
traceID, _ := strconv.ParseUint("7b91b9a72f87c13", 16, 64)
client := New("http://uat-ops-log.bilibili.co/elasticsearch/_msearch", nil)
records, err := client.Query(context.Background(), []string{"main.community.tag"}, traceID, sessionID, 1545296000, 1545296286)
if err != nil {
t.Fatal(err)
}
for _, record := range records {
t.Logf("record: %v", record)
}
}

View File

@@ -0,0 +1,48 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"depend.go",
"service.go",
"util.go",
],
importpath = "go-common/app/service/main/dapper-query/service",
tags = ["manual"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper-query/api/v1:go_default_library",
"//app/service/main/dapper-query/conf:go_default_library",
"//app/service/main/dapper-query/dao:go_default_library",
"//app/service/main/dapper-query/model:go_default_library",
"//app/service/main/dapper-query/pkg/cltclient:go_default_library",
"//app/service/main/dapper-query/service/opslog:go_default_library",
"//library/ecode:go_default_library",
"//library/log:go_default_library",
"//library/sync/errgroup:go_default_library",
"@io_bazel_rules_go//proto/wkt:duration_go_proto",
"@io_bazel_rules_go//proto/wkt:timestamp_go_proto",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/main/dapper-query/service/opslog:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,103 @@
package service
import (
"context"
"sync"
"time"
"go-common/app/service/main/dapper-query/dao"
"go-common/app/service/main/dapper-query/model"
"go-common/library/log"
"go-common/library/sync/errgroup"
)
// timeOffset secode
var _timeOffset int64 = 1800
// DependItem service depend
type DependItem struct {
ServiceName string
Component string
OperationName string
}
func parseDependItem(spans []*model.Span, serviceName, operationName string) (depends []DependItem) {
spanMap := make(map[uint64][]*model.Span)
var root *model.Span
for _, span := range spans {
if span.ServiceName == serviceName && span.IsServer() {
root = span
continue
}
spanMap[span.ParentID] = append(spanMap[span.ParentID], span)
}
if root == nil {
return
}
for _, span := range spanMap[root.SpanID] {
if span.IsServer() {
continue
}
if peerSpans, ok := spanMap[span.SpanID]; ok {
span = peerSpans[0]
depends = append(depends, DependItem{
Component: span.StringTag("component"),
ServiceName: span.ServiceName,
OperationName: span.OperationName,
})
} else {
peerService := span.StringTag("peer.service")
// in old dapper sdk service such as redis, memcache, mysql be save as service_name not in peer.service
if peerService == "" {
peerService = span.ServiceName
}
depends = append(depends, DependItem{
Component: span.StringTag("component"),
ServiceName: peerService,
OperationName: span.OperationName,
})
}
}
return depends
}
func (s *service) fetchServiceDepend(ctx context.Context, serviceName string, operationNames []string) []DependItem {
var mx sync.Mutex
var depends []DependItem
appendDepend := func(items []DependItem) {
mx.Lock()
depends = append(depends, items...)
mx.Unlock()
}
end := time.Now().Unix() - _timeOffset
start := end - 3600
sel := &dao.Selector{Limit: 3, Start: start, End: end}
group := &errgroup.Group{}
for i := range operationNames {
operationName := operationNames[i]
group.Go(func() error {
refs, err := s.daoImpl.QuerySpanList(ctx, serviceName, operationName, sel, dao.TimeDesc)
if err != nil && err != dao.ErrNotFound {
if err != dao.ErrNotFound {
// only log error don't return error
log.Warn("query span list error: %s", err)
}
return nil
}
for _, ref := range refs {
spans, err := s.daoImpl.Trace(ctx, ref.TraceID)
if err != nil {
if err != dao.ErrNotFound {
// only log error don't return error
log.Warn("fetch trace span error: %s", err)
}
}
appendDepend(parseDependItem(spans, serviceName, operationName))
}
return nil
})
}
group.Wait()
return depends
}

View File

@@ -0,0 +1,38 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["opslog.go"],
importpath = "go-common/app/service/main/dapper-query/service/opslog",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/main/dapper-query/api/v1:go_default_library",
"//app/service/main/dapper-query/conf:go_default_library",
"//app/service/main/dapper-query/dao:go_default_library",
"//app/service/main/dapper-query/model:go_default_library",
"//app/service/main/dapper-query/pkg/opslog:go_default_library",
"//app/service/main/dapper-query/util:go_default_library",
"//library/ecode:go_default_library",
"//library/log:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,149 @@
package opslog
import (
"context"
"strconv"
"strings"
"time"
"go-common/library/ecode"
"go-common/library/log"
"go-common/app/service/main/dapper-query/api/v1"
"go-common/app/service/main/dapper-query/conf"
"go-common/app/service/main/dapper-query/dao"
"go-common/app/service/main/dapper-query/model"
"go-common/app/service/main/dapper-query/pkg/opslog"
"go-common/app/service/main/dapper-query/util"
)
// Service OpslogService
type Service struct {
daoImpl dao.Dao
client opslog.Client
}
// NewOpsLogService opslog service provide opslog query service
func NewOpsLogService(cfg *conf.Config, d dao.Dao) *Service {
client := opslog.New(cfg.OpsLog.API, nil)
return &Service{daoImpl: d, client: client}
}
// OpsLog 获取 OpsLog 数据
func (s *Service) OpsLog(ctx context.Context, req *v1.OpsLogReq) (*v1.OpsLogReply, error) {
traceIDStr := strings.Split(req.TraceId, ":")[0]
traceID, err := strconv.ParseUint(traceIDStr, 16, 64)
if err != nil {
return nil, ecode.Error(ecode.RequestErr, err.Error())
}
var spanIDs []uint64
spanID, err := strconv.ParseUint(req.SpanId, 16, 64)
if err == nil {
spanIDs = append(spanIDs, spanID)
}
spans, err := s.daoImpl.Trace(ctx, traceID, spanIDs...)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
if len(spans) != 0 {
return s.opsLogTraceExists(ctx, traceID, spanID, req.TraceField, spans)
}
return s.opslogFromGuess(ctx, traceID, req)
}
func (s *Service) opslogFromGuess(ctx context.Context, traceID uint64, req *v1.OpsLogReq) (*v1.OpsLogReply, error) {
if req.ServiceName == "" || req.OperationName == "" {
return nil, ecode.Error(ecode.RequestErr, "service_name and operation_name required")
}
if req.End == 0 {
req.End = time.Now().Unix()
}
if req.Start == 0 {
req.Start = req.End - 3600
}
// TODO: get better way to query service name depends
refs, err := s.daoImpl.QuerySpanList(ctx, req.ServiceName, req.OperationName, &dao.Selector{Start: req.Start, End: req.End, Limit: 3}, dao.TimeDesc)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
serviceMap := make(map[string]struct{})
var spans []*model.Span
for _, ref := range refs {
if ref.IsError {
continue
}
spans, err = s.daoImpl.Trace(ctx, ref.TraceID)
if err != nil {
log.Warn("get trace %x error: %s", ref.TraceID, err)
}
for _, span := range spans {
serviceMap[span.ServiceName] = struct{}{}
}
}
serviceNames := make([]string, 0, len(serviceMap))
for key := range serviceMap {
serviceNames = append(serviceNames, key)
}
records, err := s.client.Query(ctx, serviceNames, traceID, util.SessionIDFromContext(ctx), req.Start, req.End)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
return &v1.OpsLogReply{Records: toAPIRecords(records)}, nil
}
func (s *Service) opsLogTraceExists(ctx context.Context, traceID, spanID uint64, traceField string, spans []*model.Span) (*v1.OpsLogReply, error) {
var start, end int64
serviceMap := make(map[string]bool)
for _, span := range spans {
if start == 0 {
// start set 300 second before span start_time
start = span.StartTime.Unix() - 300
// end set 300 second after span start_time
end = start + 600
}
if !span.IsServer() {
continue
}
if spanID != 0 && span.SpanID != spanID {
continue
}
serviceMap[span.ServiceName] = true
}
serviceNames := make([]string, 0, len(serviceMap))
for key := range serviceMap {
serviceNames = append(serviceNames, key)
}
var options []opslog.Option
if traceField != "" {
options = append(options, opslog.SetTraceField(traceField))
}
records, err := s.client.Query(ctx, serviceNames, traceID, util.SessionIDFromContext(ctx), start, end, options...)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
return &v1.OpsLogReply{Records: toAPIRecords(records)}, nil
}
func toAPIRecords(records []*opslog.Record) []*v1.OpsLogRecord {
apiRecords := make([]*v1.OpsLogRecord, 0, len(records))
for _, record := range records {
apiRecord := &v1.OpsLogRecord{
Time: record.Time.Format("2006-01-02T15:04:05.000"),
Level: record.Level,
Message: record.Message,
Fields: make(map[string]*v1.TagValue),
}
for k, v := range record.Fields {
switch val := v.(type) {
case string:
apiRecord.Fields[k] = &v1.TagValue{Value: &v1.TagValue_StringValue{StringValue: val}}
case bool:
apiRecord.Fields[k] = &v1.TagValue{Value: &v1.TagValue_BoolValue{BoolValue: val}}
case float64:
apiRecord.Fields[k] = &v1.TagValue{Value: &v1.TagValue_FloatValue{FloatValue: float32(val)}}
}
}
apiRecords = append(apiRecords, apiRecord)
}
return apiRecords
}

View File

@@ -0,0 +1,516 @@
package service
import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"
"go-common/library/ecode"
"go-common/app/service/main/dapper-query/api/v1"
"go-common/app/service/main/dapper-query/conf"
"go-common/app/service/main/dapper-query/dao"
"go-common/app/service/main/dapper-query/model"
"go-common/app/service/main/dapper-query/pkg/cltclient"
"go-common/app/service/main/dapper-query/service/opslog"
"go-common/library/log"
)
const (
defaultTimeRange = 3600
)
type service struct {
opsLog *opslog.Service
daoImpl dao.Dao
clt *cltclient.Client
}
var _ v1.BMDapperQueryServer = &service{}
// New DapperQueryService implement DapperQueryServer
func New(cfg *conf.Config) (v1.DapperQueryServer, error) {
daoImpl, err := dao.New(cfg)
if err != nil {
return nil, err
}
clt, err := cltclient.New(cfg.Collectors.Nodes, nil)
if err != nil {
return nil, err
}
opsLog := opslog.NewOpsLogService(cfg, daoImpl)
return &service{daoImpl: daoImpl, clt: clt, opsLog: opsLog}, nil
}
// OpsLog 获取 OpsLog 数据
func (s *service) OpsLog(ctx context.Context, req *v1.OpsLogReq) (*v1.OpsLogReply, error) {
return s.opsLog.OpsLog(ctx, req)
}
func (s *service) ListServiceName(ctx context.Context, req *v1.ListServiceNameReq) (*v1.ListServiceNameReply, error) {
serviceNames, err := s.daoImpl.ServiceNames(ctx)
if err != nil {
return nil, err
}
reply := &v1.ListServiceNameReply{
ServiceNames: serviceNames,
}
return reply, nil
}
func (s *service) ListOperationName(ctx context.Context, req *v1.ListOperationNameReq) (*v1.ListOperationNameReply, error) {
serviceName := req.ServiceName
operationNames, err := s.daoImpl.OperationNames(ctx, serviceName)
if err != nil {
return nil, err
}
reply := &v1.ListOperationNameReply{OperationNames: operationNames}
return reply, nil
}
func (s *service) ListSpan(ctx context.Context, req *v1.ListSpanReq) (*v1.ListSpanReply, error) {
serviceName := req.ServiceName
operationName := req.OperationName
start := req.Start
end := req.End
// if start or end not set, set time range to last hour
if start == 0 || end == 0 {
end = time.Now().Unix()
start = end - defaultTimeRange
}
order := req.Order
if order == "" {
order = dao.TimeDesc
}
if !s.daoImpl.SupportOrder(order) {
return nil, ecode.Errorf(ecode.RequestErr, "request order %s unsupport yet ㄟ( ▔, ▔ )ㄏ", order)
}
onlyError := req.OnlyError
offset := int(req.Offset)
limit := int(req.Limit)
if limit == 0 {
limit = 50
}
sel := &dao.Selector{Start: start, End: end, Offset: offset, Limit: limit, OnlyError: onlyError}
spanListRefs, err := s.daoImpl.QuerySpanList(ctx, serviceName, operationName, sel, order)
if err != nil {
log.Error("query span list error: %s", err.Error())
}
reply := &v1.ListSpanReply{Items: make([]*v1.SpanListItem, 0, len(spanListRefs))}
for _, spanListRef := range spanListRefs {
item, err := s.getSpanListItem(ctx, serviceName, spanListRef)
if err != nil {
log.Error("query span list item error: %s", err)
continue
}
reply.Items = append(reply.Items, item)
}
return reply, nil
}
func (s *service) getSpanListItem(ctx context.Context, serviceName string, spanListRef model.SpanListRef) (*v1.SpanListItem, error) {
spans, err := s.daoImpl.Trace(ctx, spanListRef.TraceID, spanListRef.SpanID)
if err != nil {
return nil, err
}
var currentSpan *model.Span
for _, span := range spans {
if span.SpanID == spanListRef.SpanID {
currentSpan = span
}
}
if currentSpan == nil {
return nil, fmt.Errorf("can't find span: %x in traceid: %x", spanListRef.SpanID, spanListRef.TraceID)
}
var mark string
if currentSpan.IsError() {
rangelog:
for _, log := range currentSpan.Logs {
for _, field := range log.Fields {
if field.Key == "message" {
mark = string(field.Value)
break rangelog
}
}
}
}
item := &v1.SpanListItem{
TraceId: spanListRef.TraceIDStr(),
SpanId: spanListRef.SpanIDStr(),
ParentId: strconv.FormatUint(currentSpan.ParentID, 16),
ServiceName: currentSpan.ServiceName,
OperationName: currentSpan.OperationName,
StartTime: currentSpan.StartTime.Format("2006-01-02T15:04:05.000"),
Duration: currentSpan.Duration.String(),
IsError: currentSpan.IsError(),
RegionZone: currentSpan.StringTag("region") + ":" + currentSpan.StringTag("zone"),
ContainerIp: currentSpan.StringTag("ip"),
Mark: mark,
Tags: toAPITags(currentSpan.Tags),
}
return item, nil
}
func (s *service) Trace(ctx context.Context, req *v1.TraceReq) (*v1.TraceReply, error) {
traceIDStr := strings.Split(req.TraceId, ":")[0]
traceID, err := strconv.ParseUint(traceIDStr, 16, 64)
if err != nil {
return nil, ecode.Errorf(ecode.RequestErr, "invalid traceID %s", req.TraceId)
}
spanID, _ := strconv.ParseUint(req.SpanId, 16, 64)
spans, err := s.daoImpl.Trace(ctx, traceID)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
var apiSpans []*v1.Span
var root *v1.Span
compatibleLegacySpan(spans)
serviceMap := make(map[string]bool)
for _, span := range spans {
apiSpan := s.toAPISpan(span)
if root == nil && ((spanID == 0 && span.IsServer()) || (span.SpanID == spanID && span.IsServer())) {
root = apiSpan
}
apiSpans = append(apiSpans, apiSpan)
serviceMap[span.ServiceName] = true
if peerService := span.StringTag("peer.service"); peerService != "" {
serviceMap[peerService] = true
}
}
if root == nil {
// NOTE: return root=nil trace if not found, it help web judge
return &v1.TraceReply{}, nil
}
root.Level = 1
parentMap := make(map[string][]*v1.Span)
for _, apiSpan := range apiSpans {
// skip root span
if apiSpan.SpanId == root.SpanId {
continue
}
parentMap[apiSpan.ParentId] = append(parentMap[apiSpan.ParentId], apiSpan)
}
maxLevel := setChilds(root, parentMap, root.Level)
reply := &v1.TraceReply{
Root: root,
SpanCount: int32(len(spans) - len(parentMap)),
MaxLevel: maxLevel,
ServiceCount: int32(len(serviceMap)),
}
return reply, nil
}
func (s *service) RawTrace(ctx context.Context, req *v1.RawTraceReq) (*v1.RawTraceReply, error) {
traceID, err := strconv.ParseUint(req.TraceId, 16, 64)
if err != nil {
return nil, ecode.Errorf(ecode.RequestErr, "invalid traceID %s", req.TraceId)
}
spans, err := s.daoImpl.Trace(ctx, traceID)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := &v1.RawTraceReply{
Items: make([]*v1.Span, 0, len(spans)),
}
for _, span := range spans {
reply.Items = append(reply.Items, s.toAPISpan(span))
}
sort.Slice(reply.Items, func(i, j int) bool {
return reply.Items[i].StartTime > reply.Items[j].StartTime
})
return reply, nil
}
func toAPITags(tags map[string]interface{}) map[string]*v1.TagValue {
apiTags := make(map[string]*v1.TagValue)
for key, value := range tags {
apitag := &v1.TagValue{}
switch val := value.(type) {
case string:
apitag.Value = &v1.TagValue_StringValue{StringValue: val}
case int64:
apitag.Value = &v1.TagValue_Int64Value{Int64Value: val}
case bool:
apitag.Value = &v1.TagValue_BoolValue{BoolValue: val}
case float64:
apitag.Value = &v1.TagValue_FloatValue{FloatValue: float32(val)}
}
apiTags[key] = apitag
}
return apiTags
}
func (s *service) toAPISpan(span *model.Span) *v1.Span {
apiSpan := &v1.Span{
ServiceName: span.ServiceName,
OperationName: span.OperationName,
TraceId: span.TraceIDStr(),
SpanId: span.SpanIDStr(),
ParentId: span.ParentIDStr(),
StartTime: span.StartTime.UnixNano(),
Duration: int64(span.Duration),
}
for _, log := range span.Logs {
apilog := &v1.Log{Timestamp: log.Timestamp}
for _, field := range log.Fields {
apilog.Fields = append(apilog.Fields, &v1.Field{Key: field.Key, Value: string(field.Value)})
}
apiSpan.Logs = append(apiSpan.Logs, apilog)
}
apiSpan.Tags = toAPITags(span.Tags)
return apiSpan
}
// OperationNameRank 查询 OperationName 排名列表
func (s *service) OperationNameRank(ctx context.Context, req *v1.OperationNameRankReq) (*v1.OperationNameRankReply, error) {
serviceName := req.ServiceName
start, end := req.Start, req.End
// if start or end not set, set time range to last hour
if start == 0 || end == 0 {
end = time.Now().Unix()
start = end - defaultTimeRange
}
rankType := req.RankType
if rankType != "" {
if !model.VerifyRankType(rankType) {
return nil, ecode.Errorf(ecode.RequestErr, "request rankType %s unsupport yet", rankType)
}
} else {
rankType = model.MaxDurationRank
}
values, err := s.daoImpl.MeanOperationNameField(ctx, map[string]string{
dao.ServiceNameTag: serviceName,
dao.SpanKindTag: "server",
}, rankType, start, end, []string{dao.OperationNameTag})
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
model.SortRank(values)
reply := &v1.OperationNameRankReply{RankType: rankType, Items: make([]*v1.RankItem, 0, len(values))}
for _, value := range values {
operationName := value.Tag[dao.OperationNameTag]
reply.Items = append(reply.Items, &v1.RankItem{
ServiceName: serviceName,
OperationName: operationName,
Value: value.Value,
})
}
return reply, nil
}
// DependsRank DependsRank
func (s *service) DependsRank(ctx context.Context, req *v1.DependsRankReq) (*v1.DependsRankReply, error) {
serviceName := req.ServiceName
start, end := req.Start, req.End
// if start or end not set, set time range to last hour
if start == 0 || end == 0 {
end = time.Now().Unix()
start = end - defaultTimeRange
}
rankType := req.RankType
if rankType != "" {
if !model.VerifyRankType(rankType) {
return nil, ecode.Errorf(ecode.RequestErr, "request rankType %s unsupport yet", rankType)
}
} else {
rankType = model.MaxDurationRank
}
values, err := s.daoImpl.MeanOperationNameField(ctx, map[string]string{
dao.ServiceNameTag: serviceName,
dao.SpanKindTag: "client",
}, rankType, start, end, []string{dao.PeerServiceTag, dao.OperationNameTag})
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := &v1.DependsRankReply{RankType: rankType, Items: make([]*v1.RankItem, 0, len(values))}
for _, value := range values {
operationName := value.Tag[dao.OperationNameTag]
serviceName := value.Tag[dao.PeerServiceTag]
reply.Items = append(reply.Items, &v1.RankItem{
ServiceName: serviceName,
OperationName: operationName,
Value: value.Value,
})
}
return reply, nil
}
// SpanSeries 获取 span 的时间序列数据
func (s *service) SpanSeries(ctx context.Context, req *v1.SpanSeriesReq) (*v1.SpanSeriesReply, error) {
start, end := req.Start, req.End
// if start or end not set, set time range to last hour
if start == 0 || end == 0 {
end = time.Now().Unix()
start = end - defaultTimeRange
}
interval := (end - start) / 120
if interval < 5 {
interval = 5
}
serviceName := req.ServiceName
operationName := req.OperationName
fields := strings.Split(req.Fields, ",")
seriesFn := s.daoImpl.SpanSeriesMean
// 为错误序列特别处理
if len(fields) == 1 && fields[0] == "errors" {
seriesFn = s.daoImpl.SpanSeriesCount
}
series, err := seriesFn(ctx, serviceName, operationName, fields, start, end, interval)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := &v1.SpanSeriesReply{Interval: int64(interval)}
reply.Times = make([]string, len(series.Timestamps))
for i, timestamp := range series.Timestamps {
formatedTime := time.Unix(timestamp/int64(time.Second), timestamp%int64(time.Second)).Format("2006-01-02T15:04:05")
reply.Times[i] = formatedTime
}
reply.Items = make([]*v1.SeriesItem, len(series.Items))
for i, item := range series.Items {
apiItem := &v1.SeriesItem{Field: item.Field, Values: make([]*int64, len(item.Rows))}
for j, val := range item.Rows {
if val == nil {
apiItem.Values[j] = nil
} else {
valInt64 := int64(*val)
apiItem.Values[j] = &valInt64
}
}
reply.Items[i] = apiItem
}
return reply, nil
}
func (s *service) SamplePoint(ctx context.Context, req *v1.SamplePointReq) (*v1.SamplePointReply, error) {
serviceName := req.ServiceName
operationName := req.OperationName
startTime, err := time.ParseInLocation("2006-01-02T15:04:05", req.Time, time.Local)
if err != nil {
return nil, ecode.Error(ecode.RequestErr, err.Error())
}
start, end := startTime.Unix()-req.Interval, startTime.Unix()+req.Interval
spanListRef, err := s.daoImpl.QuerySpanList(ctx, serviceName, operationName, &dao.Selector{Start: start, End: end, Limit: 50, OnlyError: req.OnlyError}, dao.TimeDesc)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := &v1.SamplePointReply{}
for _, ref := range spanListRef {
reply.Items = append(reply.Items, &v1.SamplePointItem{
TraceId: ref.TraceIDStr(),
SpanId: ref.SpanIDStr(),
Duration: ref.Duration,
IsError: ref.IsError,
})
}
return reply, nil
}
// CltStatus CltStatus
func (s *service) CltStatus(ctx context.Context, req *v1.CltStatusReq) (*v1.CltStatusReply, error) {
nodes, err := s.clt.Status(ctx)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := new(v1.CltStatusReply)
for _, node := range nodes {
apiNode := &v1.CltNode{
Node: node.Node,
QueueLen: int64(node.QueueLen),
}
for _, client := range node.Clients {
apiNode.Clients = append(apiNode.Clients, &v1.Client{
Addr: client.Addr,
ErrCount: client.ErrCount,
Rate: client.Rate,
UpTime: client.UpTime,
})
}
reply.Nodes = append(reply.Nodes, apiNode)
}
return reply, nil
}
// DependsTopology 依赖拓扑
func (s *service) DependsTopology(ctx context.Context, req *v1.DependsTopologyReq) (*v1.DependsTopologyReply, error) {
serviceNames, err := s.daoImpl.ServiceNames(ctx)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
reply := &v1.DependsTopologyReply{}
for _, serviceName := range serviceNames {
peerServices, err := s.daoImpl.PeerService(ctx, serviceName)
if err != nil {
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
log.V(10).Info("peerService: %+v", peerServices)
for _, peerService := range peerServices {
reply.Items = append(reply.Items, &v1.DependsTopologyItem{ServiceName: serviceName, DependOn: peerService})
}
}
return reply, nil
}
// ServiceDepend 查询服务依赖
func (s *service) ServiceDepend(ctx context.Context, req *v1.ServiceDependReq) (*v1.ServiceDependReply, error) {
operationNames, err := s.daoImpl.OperationNames(ctx, req.ServiceName)
if err != nil {
if err == dao.ErrNotFound {
return nil, ecode.Errorf(ecode.NothingFound, "service %s not exists", req.ServiceName)
}
return nil, ecode.Error(ecode.ServerErr, err.Error())
}
if req.OperationName != "" {
for _, operationName := range operationNames {
if operationName == req.OperationName {
operationNames = []string{operationName}
break
}
}
if len(operationNames) != 1 {
return nil, ecode.Errorf(ecode.RequestErr, "operationName %s not exists for service %s", req.OperationName, req.ServiceName)
}
}
items := s.fetchServiceDepend(ctx, req.ServiceName, operationNames)
return &v1.ServiceDependReply{Items: mergeDepends(items)}, nil
}
func mergeDepends(items []DependItem) []*v1.ServiceDependItem {
var apiItems []*v1.ServiceDependItem
apiItemsExists := func(serviceName, component string) (*v1.ServiceDependItem, bool) {
for _, apiItem := range apiItems {
if apiItem.ServiceName == serviceName && apiItem.Component == component {
return apiItem, true
}
}
return nil, false
}
addOperationName := func(apiItem *v1.ServiceDependItem, operationName string) {
for _, name := range apiItem.OperationNames {
if name == operationName {
return
}
}
apiItem.OperationNames = append(apiItem.OperationNames, operationName)
}
for _, item := range items {
if apiItem, ok := apiItemsExists(item.ServiceName, item.Component); ok {
addOperationName(apiItem, item.OperationName)
} else {
apiItems = append(apiItems, &v1.ServiceDependItem{
ServiceName: item.ServiceName,
Component: item.Component,
OperationNames: []string{item.OperationName},
})
}
}
return apiItems
}

View File

@@ -0,0 +1,84 @@
package service
import (
"sort"
"time"
"github.com/golang/protobuf/ptypes/duration"
"github.com/golang/protobuf/ptypes/timestamp"
apiv1 "go-common/app/service/main/dapper-query/api/v1"
"go-common/app/service/main/dapper-query/model"
)
func compatibleLegacySpan(spans []*model.Span) bool {
var fixed bool
set := make(map[uint64]*model.Span)
for _, sp1 := range spans {
if sp2, ok := set[sp1.SpanID]; ok {
fixed = fixed || fixParentID(sp1, sp2)
delete(set, sp1.SpanID)
} else {
set[sp1.SpanID] = sp1
}
}
return fixed
}
func fixParentID(sp1, sp2 *model.Span) bool {
var client, server *model.Span
for _, sp := range []*model.Span{sp1, sp2} {
if sp.IsServer() {
server = sp
} else {
client = sp
}
}
if client == nil || server == nil {
return false
}
server.ParentID = client.SpanID
return true
}
func setChilds(node *apiv1.Span, parentMap map[string][]*apiv1.Span, level int32) int32 {
spans, ok := parentMap[node.SpanId]
if !ok {
return level
}
level++
delete(parentMap, node.SpanId)
// compatible old span pair, client server has same span_id, parent_span_id
for _, span := range spans {
span.Level = int32(level)
node.Childs = append(node.Childs, span)
}
if node.Childs != nil {
sort.Slice(node.Childs, func(i, j int) bool {
iStartTime := node.Childs[i].StartTime
jStartTime := node.Childs[j].StartTime
return iStartTime < jStartTime
})
}
var newLevel int32
for _, cnode := range node.Childs {
if ret := setChilds(cnode, parentMap, level); ret > newLevel {
newLevel = ret
}
}
return newLevel
}
func protoTimestamp(t time.Time) *timestamp.Timestamp {
return &timestamp.Timestamp{
Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()),
}
}
func protoDuration(d time.Duration) *duration.Duration {
return &duration.Duration{
Seconds: d.Nanoseconds() / int64(time.Second),
Nanos: int32(d.Nanoseconds() % int64(time.Second)),
}
}

View File

@@ -0,0 +1,32 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["sessionid.go"],
importpath = "go-common/app/service/main/dapper-query/util",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/ecode:go_default_library",
"//library/net/http/blademaster:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,42 @@
package util
import (
"context"
"go-common/library/ecode"
bm "go-common/library/net/http/blademaster"
)
const (
_ajsSessioID = "_AJSESSIONID"
)
type sessionKeyT string
var sessionKey sessionKeyT = "sessionID"
// SessionIDMiddleware extrace session from cookie set to context
func SessionIDMiddleware(c *bm.Context) {
cookie, err := c.Request.Cookie(_ajsSessioID)
if err != nil {
c.JSON(nil, ecode.AccessDenied)
c.Abort()
return
}
c.Context = SessionIDWithContext(c.Context, cookie.Value)
}
// SessionIDFromContext get session id from context
func SessionIDFromContext(ctx context.Context) string {
if val := ctx.Value(sessionKey); val != nil {
if sessionID, ok := val.(string); ok {
return sessionID
}
}
return ""
}
// SessionIDWithContext set session id to context
func SessionIDWithContext(ctx context.Context, sessionID string) context.Context {
return context.WithValue(ctx, sessionKey, sessionID)
}