Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,51 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"aggr.go",
"config.go",
"lancer.go",
"pool.go",
],
importpath = "go-common/app/service/ops/log-agent/output/lancergrpc",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/ops/log-agent/event:go_default_library",
"//app/service/ops/log-agent/output:go_default_library",
"//app/service/ops/log-agent/output/cache/file:go_default_library",
"//app/service/ops/log-agent/output/lancergrpc/lancergateway:go_default_library",
"//app/service/ops/log-agent/pkg/bufio:go_default_library",
"//app/service/ops/log-agent/pkg/common:go_default_library",
"//app/service/ops/log-agent/pkg/flowmonitor:go_default_library",
"//app/service/ops/log-agent/pkg/lancermonitor:go_default_library",
"//library/log:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_google_grpc//codes:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/service/ops/log-agent/output/lancergrpc/lancergateway:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,63 @@
package lancergrpc
import (
"bytes"
"time"
"go-common/app/service/ops/log-agent/event"
)
const (
_logSeparator = byte('\u0001')
_logLancerHeaderLen = 19
)
// logAggr aggregates multi logs to one log
func (l *Lancer) logAggr(e *event.ProcessorEvent) (err error) {
logAddrbuf := l.getlogAggrBuf(e.LogId)
l.logAggrBufLock.Lock()
logAddrbuf.Write(e.Bytes())
logAddrbuf.WriteByte(_logSeparator)
l.logAggrBufLock.Unlock()
if logAddrbuf.Len() > l.c.AggrSize {
return l.flushLogAggr(e.LogId)
}
event.PutEvent(e)
return nil
}
// getlogAggrBuf get logAggrBuf by logId
func (l *Lancer) getlogAggrBuf(logId string) (*bytes.Buffer) {
if _, ok := l.logAggrBuf[logId]; !ok {
l.logAggrBuf[logId] = new(bytes.Buffer)
}
return l.logAggrBuf[logId]
}
// flushLogAggr write aggregated logs to conn
func (l *Lancer) flushLogAggr(logId string) (err error) {
l.logAggrBufLock.Lock()
defer l.logAggrBufLock.Unlock()
buf := l.getlogAggrBuf(logId)
if buf.Len() > 0 {
logDoc := new(logDoc)
logDoc.b = make([]byte, buf.Len())
copy(logDoc.b, buf.Bytes())
logDoc.logId = logId
l.sendChan <- logDoc
}
buf.Reset()
return nil
}
// flushLogAggrPeriodically run flushLogAggr Periodically
func (l *Lancer) flushLogAggrPeriodically() {
tick := time.NewTicker(5 * time.Second)
for {
select {
case <-tick.C:
for logid, _ := range l.logAggrBuf {
l.flushLogAggr(logid)
}
}
}
}

View File

@@ -0,0 +1,90 @@
package lancergrpc
import (
"errors"
"time"
"go-common/app/service/ops/log-agent/output/cache/file"
streamEvent "go-common/app/service/ops/log-agent/output/lancergrpc/lancergateway"
xtime "go-common/library/time"
"github.com/BurntSushi/toml"
)
type Config struct {
Local bool `tome:"local"`
Name string `tome:"name"`
AggrSize int `tome:"aggrSize"`
SendConcurrency int `tome:"sendConcurrency"`
CacheConfig *file.Config `tome:"cacheConfig"`
LancerGateway *streamEvent.Config `tome:"lancerGateway"`
SendBatchSize int `tome:"sendBatchSize"`
SendBatchNum int `tome:"sendBatchNum"`
SendBatchTimeout xtime.Duration `tome:"sendBatchTimeout"`
SendFlushInterval xtime.Duration `tome:"sendFlushInterval"`
InitialRetryDuration xtime.Duration `tome:"initialRetryDuration"`
MaxRetryDuration xtime.Duration `tome:"maxRetryDuration"`
}
func (c *Config) ConfigValidate() (error) {
if c == nil {
return errors.New("config of Lancer Output is nil")
}
if c.Name == "" {
return errors.New("output Name can't be nil")
}
if c.AggrSize == 0 {
c.AggrSize = 819200
}
if c.SendConcurrency == 0 {
c.SendConcurrency = 5
}
if err := c.CacheConfig.ConfigValidate(); err != nil {
return err
}
if c.SendFlushInterval == 0 {
c.SendFlushInterval = xtime.Duration(time.Second * 5)
}
if c.InitialRetryDuration == 0 {
c.InitialRetryDuration = xtime.Duration(time.Millisecond * 200)
}
if c.MaxRetryDuration == 0 {
c.MaxRetryDuration = xtime.Duration(time.Second * 2)
}
if c.SendBatchNum == 0 {
c.SendBatchNum = 3000
}
if c.SendBatchSize == 0 {
c.SendBatchSize = 1024 * 1024 * 10
}
if c.SendBatchTimeout == 0 {
c.SendBatchTimeout = xtime.Duration(time.Second * 5)
}
if c.LancerGateway == nil {
c.LancerGateway = &streamEvent.Config{}
}
if err := c.LancerGateway.ConfigValidate(); err != nil {
return err
}
return nil
}
func DecodeConfig(md toml.MetaData, primValue toml.Primitive) (c interface{}, err error) {
c = new(Config)
if err = md.PrimitiveDecode(primValue, c); err != nil {
return nil, err
}
return c, nil
}

View File

@@ -0,0 +1,267 @@
package lancergrpc
import (
"context"
"fmt"
"bytes"
"sync"
"strconv"
"time"
"math"
"go-common/app/service/ops/log-agent/event"
"go-common/app/service/ops/log-agent/output"
"go-common/app/service/ops/log-agent/pkg/flowmonitor"
"go-common/app/service/ops/log-agent/pkg/common"
"go-common/app/service/ops/log-agent/output/cache/file"
"go-common/library/log"
"go-common/app/service/ops/log-agent/pkg/lancermonitor"
"google.golang.org/grpc"
"google.golang.org/grpc/codes"
"go-common/app/service/ops/log-agent/output/lancergrpc/lancergateway"
)
const (
_appIdKey = `"app_id":`
_levelKey = `"level":`
_logTime = `"time":`
)
var (
logMagic = []byte{0xAC, 0xBE}
logMagicBuf = []byte{0xAC, 0xBE}
_logType = []byte{0, 1}
_logLength = []byte{0, 0, 0, 0}
local, _ = time.LoadLocation("Local")
)
type logDoc struct {
b []byte
logId string
}
func init() {
err := output.Register("lancergrpc", NewLancer)
if err != nil {
panic(err)
}
}
type Lancer struct {
c *Config
next chan string
i chan *event.ProcessorEvent
cache *file.FileCache
logAggrBuf map[string]*bytes.Buffer
logAggrBufLock sync.Mutex
sendChan chan *logDoc
lancerClient lancergateway.Gateway2ServerClient
ctx context.Context
cancel context.CancelFunc
}
func NewLancer(ctx context.Context, config interface{}) (output.Output, error) {
var err error
lancer := new(Lancer)
if c, ok := config.(*Config); !ok {
return nil, fmt.Errorf("Error config for Lancer output")
} else {
if err = c.ConfigValidate(); err != nil {
return nil, err
}
lancer.c = c
}
if output.OutputRunning(lancer.c.Name) {
return nil, fmt.Errorf("Output %s already running", lancer.c.Name)
}
lancer.i = make(chan *event.ProcessorEvent)
lancer.next = make(chan string, 1)
lancer.logAggrBuf = make(map[string]*bytes.Buffer)
lancer.sendChan = make(chan *logDoc)
cache, err := file.NewFileCache(lancer.c.CacheConfig)
if err != nil {
return nil, err
}
lancer.cache = cache
lancer.lancerClient, err = lancergateway.NewClient(lancer.c.LancerGateway)
if err != nil {
return nil, err
}
lancer.ctx, lancer.cancel = context.WithCancel(ctx)
return lancer, nil
}
func (l *Lancer) InputChan() (chan *event.ProcessorEvent) {
return l.i
}
func (l *Lancer) Run() (err error) {
go l.readFromProcessor()
go l.consumeCache()
go l.flushLogAggrPeriodically()
for i := 0; i < l.c.SendConcurrency; i++ {
go l.sendToLancer()
}
if l.c.Name != "" {
output.RegisterOutput(l.c.Name, l)
}
return nil
}
func (l *Lancer) Stop() {
l.cancel()
}
func (l *Lancer) readFromProcessor() {
for e := range l.i {
// only cache for sock input
if e.Source == "sock" {
l.cache.WriteToCache(e)
continue
}
// without cache
l.preWriteToLancer(e)
}
}
func (l *Lancer) preWriteToLancer(e *event.ProcessorEvent) {
flowmonitor.Fm.AddEvent(e, "log-agent.output.lancer", "OK", "write to lancer")
lancermonitor.IncreaseLogCount("agent.send.success.count", e.LogId)
if l.c.Name == "lancer-ops-log" {
l.logAggr(e)
} else {
l.sendLogDirectToLancer(e)
}
}
// consumeCache consume logs from cache
func (l *Lancer) consumeCache() {
for {
e := l.cache.ReadFromCache()
if e.Length < _logLancerHeaderLen {
event.PutEvent(e)
continue
}
// monitor should be called before event recycle
l.parseOpslog(e)
l.preWriteToLancer(e)
}
}
func (l *Lancer) parseOpslog(e *event.ProcessorEvent) {
if l.c.Name == "lancer-ops-log" {
e.AppId, _ = common.SeekValue([]byte(_appIdKey), e.Bytes())
if timeValue, err := common.SeekValue([]byte(_logTime), e.Bytes()); err == nil {
if len(timeValue) >= 19 {
// parse time
var t time.Time
if t, err = time.Parse(time.RFC3339Nano, string(timeValue)); err != nil {
if t, err = time.ParseInLocation("2006-01-02T15:04:05", string(timeValue), local); err != nil {
if t, err = time.ParseInLocation("2006-01-02T15:04:05", string(timeValue[0:19]), local); err != nil {
}
}
}
if !t.IsZero() {
e.TimeRangeKey = strconv.FormatInt(t.Unix()/100*100, 10)
}
}
}
}
}
// sendLogDirectToLancer send log direct to lancer without aggr
func (l *Lancer) sendLogDirectToLancer(e *event.ProcessorEvent) {
logDoc := new(logDoc)
logDoc.b = make([]byte, e.Length)
copy(logDoc.b, e.Bytes())
logDoc.logId = e.LogId
event.PutEvent(e)
l.sendChan <- logDoc
}
func (l *Lancer) nextRetry(retry int) (time.Duration) {
// avoid d too large
if retry > 10 {
return time.Duration(l.c.MaxRetryDuration)
}
d := time.Duration(math.Pow(2, float64(retry))) * time.Duration(l.c.InitialRetryDuration)
if d > time.Duration(l.c.MaxRetryDuration) {
return time.Duration(l.c.MaxRetryDuration)
}
return d
}
func (l *Lancer) bulkSendToLancerWithRetry(in *lancergateway.EventList) {
retry := 0
for {
ctx, _ := context.WithTimeout(context.Background(), time.Duration(l.c.SendBatchTimeout))
t1 := time.Now()
resp, err := l.lancerClient.SendList(ctx, in)
if err == nil {
if resp.Code == lancergateway.StatusCode_SUCCESS {
log.Info("get 200 from lancer gateway: size %d, count %d, cost %s", in.Size(), len(in.Events), time.Since(t1).String())
return
}
flowmonitor.Fm.Add("log-agent", "log-agent.output.lancer", "", "ERROR", fmt.Sprintf("write to lancer None 200: %s", resp.Code))
log.Warn("get None 200 from lancer gateway, retry: %s", resp.Code)
}
if err != nil {
switch grpc.Code(err) {
case codes.Canceled, codes.DeadlineExceeded, codes.Unavailable, codes.ResourceExhausted:
flowmonitor.Fm.Add("log-agent", "log-agent.output.lancer", "", "ERROR", fmt.Sprintf("write to lancer failed, retry: %s", err))
log.Warn("get error from lancer gateway, retry: %s", err)
default:
flowmonitor.Fm.Add("log-agent", "log-agent.output.lancer", "", "ERROR", fmt.Sprintf("write to lancer failed, no retry: %s", err))
log.Warn("get error from lancer gateway, no retry: %s", err)
return
}
}
time.Sleep(l.nextRetry(retry))
retry ++
}
}
// sendproc send the proc to lancer
func (l *Lancer) sendToLancer() {
eventList := new(lancergateway.EventList)
eventListLock := sync.Mutex{}
lastSend := time.Now()
ticker := time.Tick(time.Second * 1)
size := 0
for {
select {
case <-ticker:
if lastSend.Add(time.Duration(l.c.SendFlushInterval)).Before(time.Now()) && len(eventList.Events) > 0 {
eventListLock.Lock()
l.bulkSendToLancerWithRetry(eventList)
eventList.Reset()
size = 0
eventListLock.Unlock()
lastSend = time.Now()
}
case logDoc := <-l.sendChan:
event := new(lancergateway.SimpleEvent)
event.LogId = logDoc.logId
event.Header = map[string]string{"timestamp": strconv.FormatInt(time.Now().Unix()/100*100, 10)}
event.Data = logDoc.b
size += len(event.Data)
eventListLock.Lock()
eventList.Events = append(eventList.Events, event)
if size > l.c.SendBatchSize || len(eventList.Events) > l.c.SendBatchNum {
l.bulkSendToLancerWithRetry(eventList)
eventList.Reset()
size = 0
lastSend = time.Now()
}
eventListLock.Unlock()
}
}
}

View File

@@ -0,0 +1,60 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "StreamEvent_proto",
srcs = ["StreamEvent.proto"],
tags = ["automanaged"],
deps = ["@gogo_special_proto//github.com/gogo/protobuf/gogoproto"],
)
go_proto_library(
name = "StreamEvent_go_proto",
compilers = ["@io_bazel_rules_go//proto:gogofast_grpc"],
importpath = "go-common/app/service/ops/log-agent/output/lancergrpc/lancergateway",
proto = ":StreamEvent_proto",
tags = ["automanaged"],
deps = ["@com_github_gogo_protobuf//gogoproto:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = ["client.go"],
embed = [":StreamEvent_go_proto"],
importpath = "go-common/app/service/ops/log-agent/output/lancergrpc/lancergateway",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/naming/discovery:go_default_library",
"//library/net/rpc/warden/balancer/wrr:go_default_library",
"//library/net/rpc/warden/resolver:go_default_library",
"//library/time:go_default_library",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
"@org_golang_google_grpc//:go_default_library",
"@org_golang_x_net//context:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,36 @@
syntax = "proto3";
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
option java_multiple_files = true;
option java_package = "com.bilibili.gateway2.common.protobuf";
service Gateway2Server {
rpc sendList (EventList) returns (Response) {
}
rpc send (SimpleEvent) returns (Response) {
}
}
enum StatusCode {
NULL = 0;
SUCCESS = 200;
LOAD_FULL = 429;
}
message SimpleEvent {
string logId = 1;
string outerId = 2;
map<string, string> header = 3;
bytes data = 4;
}
message EventList {
repeated SimpleEvent events = 1;
}
message Response {
StatusCode code = 1;
string msg = 2;
}

View File

@@ -0,0 +1,61 @@
package lancergateway
import (
"time"
"errors"
"fmt"
"go-common/library/net/rpc/warden/resolver"
"go-common/library/net/rpc/warden/balancer/wrr"
"go-common/library/naming/discovery"
xtime "go-common/library/time"
"google.golang.org/grpc"
)
type Config struct {
AppId string `toml:"appId"`
Timeout xtime.Duration `toml:"timeout"`
Subset int `toml:"subset"`
}
func (c *Config) ConfigValidate() (error) {
if c == nil {
return errors.New("config of LancerGateway can't be nil")
}
if c.AppId == "" {
c.AppId = "datacenter.lancer.gateway2-server"
}
if c.Timeout == 0 {
c.Timeout = xtime.Duration(time.Second * 5)
}
if c.Subset == 0 {
c.Subset = 5
}
return nil
}
func init() {
resolver.Register(discovery.Builder())
}
// NewClient new member grpc client
func NewClient(c *Config) (Gateway2ServerClient, error) {
opts := []grpc.DialOption{
grpc.WithInsecure(),
grpc.WithBalancerName(wrr.Name),
}
if c.Timeout != 0 {
opts = append(opts, grpc.WithTimeout(time.Duration(c.Timeout)))
}
conn, err := grpc.Dial(fmt.Sprintf("discovery://default/%s?subset=%d", c.AppId, c.Subset), opts...)
if err != nil {
return nil, err
}
return NewGateway2ServerClient(conn), nil
}

View File

@@ -0,0 +1,135 @@
#!/bin/bash
DEFAULT_PROTOC_GEN="gogofast"
DEFAULT_PROTOC="protoc"
GO_COMMON_DIR_NAME="go-common"
USR_INCLUDE_DIR="/usr/local/include"
function _install_protoc() {
osname=$(uname -s)
echo "install protoc ..."
case $osname in
"Darwin" )
brew install protobuf
;;
*)
echo "unknown operating system, need install protobuf manual see: https://developers.google.com/protocol-buffers"
exit 1
;;
esac
}
function _install_protoc_gen() {
local protoc_gen=$1
case $protoc_gen in
"gofast" )
echo "install gofast from github.com/gogo/protobuf/protoc-gen-gofast"
go get github.com/gogo/protobuf/protoc-gen-gofast
;;
"gogofast" )
echo "install gogofast from github.com/gogo/protobuf/protoc-gen-gogofast"
go get github.com/gogo/protobuf/protoc-gen-gogofast
;;
"gogo" )
echo "install gogo from github.com/gogo/protobuf/protoc-gen-gogo"
go get github.com/gogo/protobuf/protoc-gen-gogo
;;
"go" )
echo "install protoc-gen-go from github.com/golang/protobuf"
go get github.com/golang/protobuf/{proto,protoc-gen-go}
;;
*)
echo "can't install protoc-gen-${protoc_gen} automatic !"
exit 1;
;;
esac
}
function _find_go_common_dir() {
local go_common_dir_name=$1
local current_dir=$(pwd)
while [[ "$(basename $current_dir)" != "$go_common_dir_name" ]]; do
current_dir=$(dirname $current_dir)
if [[ "$current_dir" == "/" || "$current_dir" == "." || -z "$current_dir" ]]; then
return 1
fi
done
echo $current_dir
}
function _fix_pb_file() {
local target_dir=$1
echo "fix pb file"
local pb_files=$(find $target_dir -name "*.pb.go" -type f)
local pkg_name_esc=$(echo "$target_dir" | sed 's_/_\\/_g')
for file in $pb_files; do
echo "fix pb file $file"
if [[ $(uname -s) == 'Darwin' ]]; then
sed -i "" -e "s/^import \(.*\) \"app\/\(.*\)\"/import \1 \"go-common\/app\/\2\"/g" $file
else
sed -i"" -E "s/^import\s*(.*)\s*\"app\/(.*)\"/import\1\"go-common\/app\/\2\"/g" $file
fi
done
}
function _esc_string() {
echo $(echo "$1" | sed 's_/_\\/_g')
}
function _run_protoc() {
local proto_dir=$1
local proto_files=$(find $proto_dir -maxdepth 1 -name "*.proto")
if [[ -z $proto_files ]]; then
return
fi
local protoc_cmd="$PROTOC -I$PROTO_PATH --${PROTOC_GEN}_out=plugins=grpc:. ${proto_files}"
echo $protoc_cmd
$protoc_cmd
}
if [[ -z $PROTOC ]]; then
PROTOC=${DEFAULT_PROTOC}
which $PROTOC
if [[ "$?" -ne "0" ]]; then
_install_protoc
fi
fi
if [[ -z $PROTOC_GEN ]]; then
PROTOC_GEN=${DEFAULT_PROTOC_GEN}
which protoc-gen-$PROTOC_GEN
if [[ "$?" -ne "0" ]]; then
_install_protoc_gen $PROTOC_GEN
fi
fi
GO_COMMON_DIR=$(_find_go_common_dir $GO_COMMON_DIR_NAME)
if [[ "$?" != "0" ]]; then
echo "can't find go-common directoy"
exit 1
fi
if [[ -z $PROTO_PATH ]]; then
PROTO_PATH=$GO_COMMON_DIR:$GO_COMMON_DIR/vendor:$USR_INCLUDE_DIR
else
PROTO_PATH=$PROTO_PATH:$GO_COMMON_DIR:$GO_COMMON_DIR/vendor:$USR_INCLUDE_DIR
fi
if [[ ! -z $1 ]]; then
cd $1
fi
TARGET_DIR=$(pwd)
GO_COMMON_DIR_ESC=$(_esc_string "$GO_COMMON_DIR/")
TARGET_DIR=${TARGET_DIR//$GO_COMMON_DIR_ESC/}
# switch to go_common
cd $GO_COMMON_DIR
DIRS=$(find $TARGET_DIR -type d)
for dir in $DIRS; do
echo "run protoc in $dir"
_run_protoc $dir
done
_fix_pb_file $TARGET_DIR

View File

@@ -0,0 +1,360 @@
package lancergrpc
import (
"net"
"time"
"sync"
"errors"
"math/rand"
"expvar"
"go-common/library/log"
"go-common/app/service/ops/log-agent/pkg/bufio"
)
var (
ErrAddrListNil = errors.New("addrList can't be nil")
ErrPoolSize = errors.New("Pool size should be no greater then length of addr list")
)
type LancerBufConn struct {
conn net.Conn
wr *bufio.Writer
enabled bool
ctime time.Time
}
type connPool struct {
c *ConnPoolConfig
invalidUpstreams map[string]interface{}
invalidUpstreamsLock sync.RWMutex
validBufConnChan chan *LancerBufConn
invalidBufConnChan chan *LancerBufConn
connCounter map[string]int
connCounterLock sync.RWMutex
newConnLock sync.Mutex
}
type ConnPoolConfig struct {
Name string `tome:"name"`
AddrList []string `tome:"addrList"`
DialTimeout time.Duration `tome:"dialTimeout"`
IdleTimeout time.Duration `tome:"idleTimeout"`
BufSize int `tome:"bufSize"`
PoolSize int `tome:"poolSize"`
}
func (c *ConnPoolConfig) ConfigValidate() (error) {
if c == nil {
return errors.New("Config of pool is nil")
}
if len(c.AddrList) == 0 {
return errors.New("pool addr list can't be empty")
}
if c.DialTimeout.Seconds() == 0 {
c.DialTimeout = time.Second * 5
}
if c.IdleTimeout.Seconds() == 0 {
c.IdleTimeout = time.Minute * 15
}
if c.BufSize == 0 {
c.BufSize = 1024 * 1024 * 2 // 2M by default
}
if c.PoolSize == 0 {
c.PoolSize = len(c.AddrList)
}
return nil
}
// newConn make a connection to lancer
func (cp *connPool) newConn() (conn net.Conn, err error) {
cp.newConnLock.Lock()
defer cp.newConnLock.Unlock()
for {
if addr, err := cp.randomOneUpstream(); err == nil {
if conn, err := net.DialTimeout("tcp", addr, cp.c.DialTimeout); err == nil && conn != nil {
log.Info("connect to %s success", addr)
cp.connCounterAdd(addr)
return conn, nil
} else {
cp.markUpstreamInvalid(addr)
continue
}
} else {
return nil, err
}
}
}
// newBufConn 创建一个buf连接, buf连接绑定一个conn(无论连接是否可用)
func (cp *connPool) newBufConn() (bufConn *LancerBufConn, err error) {
bufConn = new(LancerBufConn)
bufConn.wr = bufio.NewWriterSize(nil, cp.c.BufSize)
if err := cp.setConn(bufConn); err == nil {
bufConn.enabled = true
} else {
bufConn.enabled = false
}
return bufConn, nil
}
// flushBufConn 定期flush buffer
func (cp *connPool) flushBufConn() {
for {
bufConn, _ := cp.getBufConn()
bufConn.conn.SetWriteDeadline(time.Now().Add(time.Second * 5))
if err := bufConn.wr.Flush(); err != nil {
log.Error("Error when flush to %s: %s", bufConn.conn.RemoteAddr().String(), err)
bufConn.enabled = false
}
cp.putBufConn(bufConn)
time.Sleep(time.Second * 5)
}
}
// initConnPool 初始化conn pool对象
func initConnPool(c *ConnPoolConfig) (cp *connPool, err error) {
if err = c.ConfigValidate(); err != nil {
return nil, err
}
if len(c.AddrList) == 0 {
return nil, ErrAddrListNil
}
if c.PoolSize > len(c.AddrList) {
return nil, ErrPoolSize
}
rand.Seed(time.Now().Unix())
cp = new(connPool)
cp.c = c
cp.validBufConnChan = make(chan *LancerBufConn, cp.c.PoolSize)
cp.invalidBufConnChan = make(chan *LancerBufConn, cp.c.PoolSize)
cp.invalidUpstreams = make(map[string]interface{})
cp.connCounter = make(map[string]int)
cp.initPool()
go cp.maintainUpstream()
go cp.flushBufConn()
go cp.maintainBufConnPool()
expvar.Publish("conn_pool"+cp.c.Name, expvar.Func(cp.connPoolStatus))
return cp, nil
}
// connableUpstreams 返回可以建立连接的upstream列表
func (cp *connPool) connableUpstreams() ([]string) {
list := make([]string, 0)
cp.invalidUpstreamsLock.RLock()
defer cp.invalidUpstreamsLock.RUnlock()
for _, addr := range cp.c.AddrList {
if _, ok := cp.invalidUpstreams[addr]; !ok {
if count, ok := cp.connCounter[addr]; ok && count == 0 {
list = append(list, addr)
}
}
}
return list
}
// write write []byte to BufConn
func (bc *LancerBufConn) write(p []byte) (int, error) {
bc.conn.SetWriteDeadline(time.Now().Add(time.Second * 5))
return bc.wr.Write(p)
}
// randomOneUpstream 随机返回一个可以建立连接的upstream
func (cp *connPool) randomOneUpstream() (s string, err error) {
list := cp.connableUpstreams()
if len(list) == 0 {
err = errors.New("No valid upstreams")
return
}
return list[rand.Intn(len(list))], nil
}
// initPool 初始化poolSize个数的bufConn
func (cp *connPool) initPool() {
for _, addr := range cp.c.AddrList {
cp.connCounter[addr] = 0
}
for i := 0; i < cp.c.PoolSize; i++ {
if bufConn, err := cp.newBufConn(); err == nil {
cp.putBufConn(bufConn)
}
}
}
// novalidUpstream check if there is no validUpstream
func (cp *connPool) novalidUpstream() bool {
return len(cp.invalidUpstreams) == len(cp.c.AddrList)
}
//GetConn 从pool中取一个BufConn
func (cp *connPool) getBufConn() (*LancerBufConn, error) {
for {
select {
case bufConn := <-cp.validBufConnChan:
if !bufConn.enabled {
cp.putInvalidBufConn(bufConn)
continue
}
return bufConn, nil
case <-time.After(10 * time.Second):
log.Warn("timeout when get conn from conn pool")
continue
}
}
}
// setConn 为bufConn绑定一个新的Conn
func (cp *connPool) setConn(bufConn *LancerBufConn) (error) {
if bufConn.conn != nil {
if bufConn.enabled == false {
cp.markUpstreamInvalid(bufConn.conn.RemoteAddr().String())
}
cp.connCounterDel(bufConn.conn.RemoteAddr().String())
bufConn.conn.Close()
bufConn.conn = nil
bufConn.enabled = false
}
if conn, err := cp.newConn(); err == nil {
bufConn.conn = conn
bufConn.wr.Reset(conn)
bufConn.ctime = time.Now()
bufConn.enabled = true
return nil
} else {
bufConn.enabled = false
return err
}
}
//putBufConn 把BufConn放回到pool中
func (cp *connPool) putBufConn(bufConn *LancerBufConn) {
if bufConn.enabled == false {
cp.putInvalidBufConn(bufConn)
return
}
if bufConn.ctime.Add(cp.c.IdleTimeout).Before(time.Now()) {
bufConn.wr.Flush()
cp.putInvalidBufConn(bufConn)
return
}
cp.putValidBufConn(bufConn)
}
// putValidBufConn 把 bufConn放到可用的pool中
func (cp *connPool) putValidBufConn(bufConn *LancerBufConn) {
select {
case cp.validBufConnChan <- bufConn:
return
default:
log.Warn("BufConnChan full, discardthis shouldn't happen")
return
}
}
// putInvalidBufConn 把bufConn放到不可用的pool中
func (cp *connPool) putInvalidBufConn(bufConn *LancerBufConn) {
select {
case cp.invalidBufConnChan <- bufConn:
return
default:
log.Warn("invalidBufConnChan full, discardthis shouldn't happen")
return
}
}
// maintainBufConnPool 维护BufConnPool状态
func (cp *connPool) maintainBufConnPool() {
for {
select {
case bufConn := <-cp.invalidBufConnChan:
cp.setConn(bufConn)
cp.putBufConn(bufConn)
}
time.Sleep(time.Second * 1)
}
}
//markConnInvalid会将链接关闭并且将相应upstreamserver设置为不可用
func (cp *connPool) markUpstreamInvalid(addr string) (err error) {
log.Error("mark upstream %s invalid", addr)
cp.invalidUpstreamsLock.Lock()
cp.invalidUpstreams[addr] = nil
cp.invalidUpstreamsLock.Unlock()
return
}
// markUpstreamValid 将某一addr设置为不可用
func (cp *connPool) markUpstreamValid(addr string) (err error) {
log.Info("%s is valid again", addr)
cp.invalidUpstreamsLock.Lock()
delete(cp.invalidUpstreams, addr)
cp.invalidUpstreamsLock.Unlock()
return
}
// connCounterAdd 连接数+1
func (cp *connPool) connCounterAdd(addr string) {
cp.connCounterLock.Lock()
defer cp.connCounterLock.Unlock()
if _, ok := cp.connCounter[addr]; ok {
cp.connCounter[addr] += 1
} else {
cp.connCounter[addr] = 1
}
return
}
//connCounterDel 连接数-1
func (cp *connPool) connCounterDel(addr string) {
cp.connCounterLock.Lock()
defer cp.connCounterLock.Unlock()
if _, ok := cp.connCounter[addr]; ok {
cp.connCounter[addr] -= 1
}
}
// connPoolStatus 返回connPool状态
func (cp *connPool) connPoolStatus() interface{} {
status := make(map[string]interface{})
status["conn_num"] = cp.connCounter
status["invalidUpstreams"] = cp.invalidUpstreams
return status
}
// maintainUpstream 维护upstream的健康状态
func (cp *connPool) maintainUpstream() {
for {
cp.invalidUpstreamsLock.RLock()
tryAddrs := make([]string, 0, len(cp.invalidUpstreams))
for k := range cp.invalidUpstreams {
tryAddrs = append(tryAddrs, k)
}
cp.invalidUpstreamsLock.RUnlock()
for _, addr := range tryAddrs {
if conn, err := net.DialTimeout("tcp", addr, cp.c.DialTimeout); err == nil && conn != nil {
conn.Close()
cp.markUpstreamValid(addr)
}
}
time.Sleep(time.Second * 10)
}
}
//ReleaseConnPool 释放连接池中所有链接
func (cp *connPool) ReleaseConnPool() {
log.Info("Release Conn Pool")
close(cp.validBufConnChan)
close(cp.invalidBufConnChan)
for conn := range cp.validBufConnChan {
conn.enabled = false
conn.wr.Flush()
conn.conn.Close()
}
}