Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,45 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"aggr.go",
"config.go",
"lancer.go",
"pool.go",
],
importpath = "go-common/app/service/ops/log-agent/output/lancerlogstream",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/ops/log-agent/event:go_default_library",
"//app/service/ops/log-agent/output:go_default_library",
"//app/service/ops/log-agent/output/cache/file:go_default_library",
"//app/service/ops/log-agent/pkg/bufio:go_default_library",
"//app/service/ops/log-agent/pkg/common:go_default_library",
"//app/service/ops/log-agent/pkg/flowmonitor:go_default_library",
"//app/service/ops/log-agent/pkg/lancermonitor:go_default_library",
"//library/log:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,63 @@
package lancerlogstream
import (
"bytes"
"time"
"go-common/app/service/ops/log-agent/event"
)
const (
_logSeparator = byte('\u0001')
_logLancerHeaderLen = 19
)
// logAggr aggregates multi logs to one log
func (l *Lancer) logAggr(e *event.ProcessorEvent) (err error) {
logAddrbuf := l.getlogAggrBuf(e.LogId)
l.logAggrBufLock.Lock()
logAddrbuf.Write(e.Bytes())
logAddrbuf.WriteByte(_logSeparator)
l.logAggrBufLock.Unlock()
if logAddrbuf.Len() > l.c.AggrSize {
return l.flushLogAggr(e.LogId)
}
event.PutEvent(e)
return nil
}
// getlogAggrBuf get logAggrBuf by logId
func (l *Lancer) getlogAggrBuf(logId string) (*bytes.Buffer) {
if _, ok := l.logAggrBuf[logId]; !ok {
l.logAggrBuf[logId] = new(bytes.Buffer)
}
return l.logAggrBuf[logId]
}
// flushLogAggr write aggregated logs to conn
func (l *Lancer) flushLogAggr(logId string) (err error) {
l.logAggrBufLock.Lock()
defer l.logAggrBufLock.Unlock()
buf := l.getlogAggrBuf(logId)
if buf.Len() > 0 {
logDoc := new(logDoc)
logDoc.b = make([]byte, buf.Len())
copy(logDoc.b, buf.Bytes())
logDoc.logId = logId
l.sendChan <- logDoc
}
buf.Reset()
return nil
}
// flushLogAggrPeriodically run flushLogAggr Periodically
func (l *Lancer) flushLogAggrPeriodically() {
tick := time.NewTicker(5 * time.Second)
for {
select {
case <-tick.C:
for logid, _ := range l.logAggrBuf {
l.flushLogAggr(logid)
}
}
}
}

View File

@@ -0,0 +1,53 @@
package lancerlogstream
import (
"errors"
"go-common/app/service/ops/log-agent/output/cache/file"
"github.com/BurntSushi/toml"
)
type Config struct {
Local bool `tome:"local"`
Name string `tome:"name"`
AggrSize int `tome:"aggrSize"`
SendConcurrency int `tome:"sendConcurrency"`
CacheConfig *file.Config `tome:"cacheConfig"`
PoolConfig *ConnPoolConfig `tome:"poolConfig"`
}
func (c *Config) ConfigValidate() (error) {
if c == nil {
return errors.New("config of Sock Input is nil")
}
if c.Name == "" {
return errors.New("output Name can't be nil")
}
if c.AggrSize == 0 {
c.AggrSize = 819200
}
if c.SendConcurrency == 0 {
c.SendConcurrency = 5
}
if err := c.CacheConfig.ConfigValidate(); err != nil {
return err
}
if err := c.PoolConfig.ConfigValidate(); err != nil {
return err
}
return nil
}
func DecodeConfig(md toml.MetaData, primValue toml.Primitive) (c interface{}, err error) {
c = new(Config)
if err = md.PrimitiveDecode(primValue, c); err != nil {
return nil, err
}
return c, nil
}

View File

@@ -0,0 +1,228 @@
package lancerlogstream
import (
"context"
"fmt"
"bytes"
"sync"
"encoding/binary"
"strconv"
"time"
"go-common/app/service/ops/log-agent/event"
"go-common/app/service/ops/log-agent/output"
"go-common/app/service/ops/log-agent/pkg/flowmonitor"
"go-common/app/service/ops/log-agent/pkg/common"
"go-common/app/service/ops/log-agent/output/cache/file"
"go-common/library/log"
"go-common/app/service/ops/log-agent/pkg/lancermonitor"
)
const (
_logLenStart = 2
_logLenEnd = 6
_tokenHeaderFormat = "logId=%s&timestamp=%s&version=1.1"
_protocolLen = 6
_appIdKey = `"app_id":`
_levelKey = `"level":`
_logTime = `"time":`
)
var (
logMagic = []byte{0xAC, 0xBE}
logMagicBuf = []byte{0xAC, 0xBE}
_logType = []byte{0, 1}
_logLength = []byte{0, 0, 0, 0}
local, _ = time.LoadLocation("Local")
)
type logDoc struct {
b []byte
logId string
}
func init() {
err := output.Register("lancer", NewLancer)
if err != nil {
panic(err)
}
}
type Lancer struct {
c *Config
next chan string
i chan *event.ProcessorEvent
cache *file.FileCache
logAggrBuf map[string]*bytes.Buffer
logAggrBufLock sync.Mutex
sendChan chan *logDoc
connPool *connPool
ctx context.Context
cancel context.CancelFunc
}
func NewLancer(ctx context.Context, config interface{}) (output.Output, error) {
var err error
lancer := new(Lancer)
if c, ok := config.(*Config); !ok {
return nil, fmt.Errorf("Error config for Lancer output")
} else {
if err = c.ConfigValidate(); err != nil {
return nil, err
}
lancer.c = c
}
if output.OutputRunning(lancer.c.Name) {
return nil, fmt.Errorf("Output %s already running", lancer.c.Name)
}
lancer.i = make(chan *event.ProcessorEvent)
lancer.next = make(chan string, 1)
lancer.logAggrBuf = make(map[string]*bytes.Buffer)
lancer.sendChan = make(chan *logDoc)
cache, err := file.NewFileCache(lancer.c.CacheConfig)
if err != nil {
return nil, err
}
lancer.cache = cache
lancer.c.PoolConfig.Name = lancer.c.Name
lancer.connPool, err = initConnPool(lancer.c.PoolConfig)
if err != nil {
return nil, err
}
lancer.ctx, lancer.cancel = context.WithCancel(ctx)
return lancer, nil
}
func (l *Lancer) InputChan() (chan *event.ProcessorEvent) {
return l.i
}
func (l *Lancer) Run() (err error) {
go l.writeToCache()
go l.readFromCache()
go l.flushLogAggrPeriodically()
for i := 0; i < l.c.SendConcurrency; i++ {
go l.sendToLancer()
}
output.RegisterOutput(l.c.Name, l)
return nil
}
func (l *Lancer) Stop() {
l.cancel()
}
// writeToCache write the log to cache
func (l *Lancer) writeToCache() {
for e := range l.i {
if e.Length < _logLancerHeaderLen {
event.PutEvent(e)
continue
}
l.cache.WriteToCache(e)
}
}
func (l *Lancer) readFromCache() {
for {
e := l.cache.ReadFromCache()
if e.Length < _logLancerHeaderLen {
event.PutEvent(e)
continue
}
// monitor should be called before event recycle
l.parseOpslog(e)
flowmonitor.Fm.AddEvent(e, "log-agent.output.lancer", "OK", "write to lancer")
lancermonitor.IncreaseLogCount("agent.send.success.count", e.LogId)
if l.c.Name == "lancer-ops-log" {
l.logAggr(e)
} else {
l.sendLogDirectToLancer(e)
}
}
}
func (l *Lancer) parseOpslog(e *event.ProcessorEvent) {
if l.c.Name == "lancer-ops-log" && e.Length > _logLancerHeaderLen {
logBody := e.Body[(_logLancerHeaderLen):(e.Length)]
e.AppId, _ = common.SeekValue([]byte(_appIdKey), logBody)
if timeValue, err := common.SeekValue([]byte(_logTime), logBody); err == nil {
if len(timeValue) >= 19 {
// parse time
var t time.Time
if t, err = time.Parse(time.RFC3339Nano, string(timeValue)); err != nil {
if t, err = time.ParseInLocation("2006-01-02T15:04:05", string(timeValue), local); err != nil {
if t, err = time.ParseInLocation("2006-01-02T15:04:05", string(timeValue[0:19]), local); err != nil {
}
}
}
if !t.IsZero() {
e.TimeRangeKey = strconv.FormatInt(t.Unix()/100*100, 10)
}
}
}
}
}
// sendLogDirectToLancer send log direct to lancer without aggr
func (l *Lancer) sendLogDirectToLancer(e *event.ProcessorEvent) {
logDoc := new(logDoc)
logDoc.b = make([]byte, e.Length)
copy(logDoc.b, e.Bytes())
logDoc.logId = e.LogId
event.PutEvent(e)
l.sendChan <- logDoc
}
// sendproc send the proc to lancer
func (l *Lancer) sendToLancer() {
logSend := new(bytes.Buffer)
tokenHeaderLen := []byte{0, 0}
for {
select {
case logDoc := <-l.sendChan:
var err error
if len(logDoc.b) == 0 {
continue
}
// header
logSend.Reset()
logSend.Write(logMagicBuf)
logSend.Write(_logLength) // placeholder
logSend.Write(_logType)
// token header
tokenheader := []byte(fmt.Sprintf(_tokenHeaderFormat, logDoc.logId, strconv.FormatInt(time.Now().Unix()/100*100, 10)))
binary.BigEndian.PutUint16(tokenHeaderLen, uint16(len(tokenheader)))
logSend.Write(tokenHeaderLen)
logSend.Write(tokenheader)
// log body
logSend.Write(logDoc.b)
// set log length
bs := logSend.Bytes()
binary.BigEndian.PutUint32(bs[_logLenStart:_logLenEnd], uint32(logSend.Len()-_protocolLen))
// write
connBuf, err := l.connPool.getBufConn()
if err != nil {
flowmonitor.Fm.Add("log-agent", "log-agent.output.lancer", "", "ERROR", "get conn failed")
log.Error("get conn error: %v", err)
continue
}
if _, err = connBuf.write(bs); err != nil {
log.Error("wr.Write(log) error(%v)", err)
connBuf.enabled = false
l.connPool.putBufConn(connBuf)
flowmonitor.Fm.Add("log-agent", "log-agent.output.lancer", "", "ERROR", "write to lancer failed")
continue
}
l.connPool.putBufConn(connBuf)
// TODO: flowmonitor for specific appId
}
}
}

View File

@@ -0,0 +1,361 @@
package lancerlogstream
import (
"net"
"time"
"sync"
"errors"
"math/rand"
"expvar"
"go-common/library/log"
"go-common/app/service/ops/log-agent/pkg/bufio"
xtime "go-common/library/time"
)
var (
ErrAddrListNil = errors.New("addrList can't be nil")
ErrPoolSize = errors.New("Pool size should be no greater then length of addr list")
)
type LancerBufConn struct {
conn net.Conn
wr *bufio.Writer
enabled bool
ctime time.Time
}
type connPool struct {
c *ConnPoolConfig
invalidUpstreams map[string]interface{}
invalidUpstreamsLock sync.RWMutex
validBufConnChan chan *LancerBufConn
invalidBufConnChan chan *LancerBufConn
connCounter map[string]int
connCounterLock sync.RWMutex
newConnLock sync.Mutex
}
type ConnPoolConfig struct {
Name string `tome:"name"`
AddrList []string `tome:"addrList"`
DialTimeout xtime.Duration `tome:"dialTimeout"`
IdleTimeout xtime.Duration `tome:"idleTimeout"`
BufSize int `tome:"bufSize"`
PoolSize int `tome:"poolSize"`
}
func (c *ConnPoolConfig) ConfigValidate() (error) {
if c == nil {
return errors.New("Config of pool is nil")
}
if len(c.AddrList) == 0 {
return errors.New("pool addr list can't be empty")
}
if time.Duration(c.DialTimeout) == 0 {
c.DialTimeout = xtime.Duration(time.Second * 5)
}
if time.Duration(c.IdleTimeout) == 0 {
c.IdleTimeout = xtime.Duration(time.Minute * 15)
}
if c.BufSize == 0 {
c.BufSize = 1024 * 1024 * 2 // 2M by default
}
if c.PoolSize == 0 {
c.PoolSize = len(c.AddrList)
}
return nil
}
// newConn make a connection to lancer
func (cp *connPool) newConn() (conn net.Conn, err error) {
cp.newConnLock.Lock()
defer cp.newConnLock.Unlock()
for {
if addr, err := cp.randomOneUpstream(); err == nil {
if conn, err := net.DialTimeout("tcp", addr, time.Duration(cp.c.DialTimeout)); err == nil && conn != nil {
log.Info("connect to %s success", addr)
cp.connCounterAdd(addr)
return conn, nil
} else {
cp.markUpstreamInvalid(addr)
continue
}
} else {
return nil, err
}
}
}
// newBufConn 创建一个buf连接, buf连接绑定一个conn(无论连接是否可用)
func (cp *connPool) newBufConn() (bufConn *LancerBufConn, err error) {
bufConn = new(LancerBufConn)
bufConn.wr = bufio.NewWriterSize(nil, cp.c.BufSize)
if err := cp.setConn(bufConn); err == nil {
bufConn.enabled = true
} else {
bufConn.enabled = false
}
return bufConn, nil
}
// flushBufConn 定期flush buffer
func (cp *connPool) flushBufConn() {
for {
bufConn, _ := cp.getBufConn()
bufConn.conn.SetWriteDeadline(time.Now().Add(time.Second * 5))
if err := bufConn.wr.Flush(); err != nil {
log.Error("Error when flush to %s: %s", bufConn.conn.RemoteAddr().String(), err)
bufConn.enabled = false
}
cp.putBufConn(bufConn)
time.Sleep(time.Second * 5)
}
}
// initConnPool 初始化conn pool对象
func initConnPool(c *ConnPoolConfig) (cp *connPool, err error) {
if err = c.ConfigValidate(); err != nil {
return nil, err
}
if len(c.AddrList) == 0 {
return nil, ErrAddrListNil
}
if c.PoolSize > len(c.AddrList) {
return nil, ErrPoolSize
}
rand.Seed(time.Now().Unix())
cp = new(connPool)
cp.c = c
cp.validBufConnChan = make(chan *LancerBufConn, cp.c.PoolSize)
cp.invalidBufConnChan = make(chan *LancerBufConn, cp.c.PoolSize)
cp.invalidUpstreams = make(map[string]interface{})
cp.connCounter = make(map[string]int)
cp.initPool()
go cp.maintainUpstream()
go cp.flushBufConn()
go cp.maintainBufConnPool()
expvar.Publish("conn_pool"+cp.c.Name, expvar.Func(cp.connPoolStatus))
return cp, nil
}
// connableUpstreams 返回可以建立连接的upstream列表
func (cp *connPool) connableUpstreams() ([]string) {
list := make([]string, 0)
cp.invalidUpstreamsLock.RLock()
defer cp.invalidUpstreamsLock.RUnlock()
for _, addr := range cp.c.AddrList {
if _, ok := cp.invalidUpstreams[addr]; !ok {
if count, ok := cp.connCounter[addr]; ok && count == 0 {
list = append(list, addr)
}
}
}
return list
}
// write write []byte to BufConn
func (bc *LancerBufConn) write(p []byte) (int, error) {
bc.conn.SetWriteDeadline(time.Now().Add(time.Second * 5))
return bc.wr.Write(p)
}
// randomOneUpstream 随机返回一个可以建立连接的upstream
func (cp *connPool) randomOneUpstream() (s string, err error) {
list := cp.connableUpstreams()
if len(list) == 0 {
err = errors.New("No valid upstreams")
return
}
return list[rand.Intn(len(list))], nil
}
// initPool 初始化poolSize个数的bufConn
func (cp *connPool) initPool() {
for _, addr := range cp.c.AddrList {
cp.connCounter[addr] = 0
}
for i := 0; i < cp.c.PoolSize; i++ {
if bufConn, err := cp.newBufConn(); err == nil {
cp.putBufConn(bufConn)
}
}
}
// novalidUpstream check if there is no validUpstream
func (cp *connPool) novalidUpstream() bool {
return len(cp.invalidUpstreams) == len(cp.c.AddrList)
}
//GetConn 从pool中取一个BufConn
func (cp *connPool) getBufConn() (*LancerBufConn, error) {
for {
select {
case bufConn := <-cp.validBufConnChan:
if !bufConn.enabled {
cp.putInvalidBufConn(bufConn)
continue
}
return bufConn, nil
case <-time.After(10 * time.Second):
log.Warn("timeout when get conn from conn pool")
continue
}
}
}
// setConn 为bufConn绑定一个新的Conn
func (cp *connPool) setConn(bufConn *LancerBufConn) (error) {
if bufConn.conn != nil {
if bufConn.enabled == false {
cp.markUpstreamInvalid(bufConn.conn.RemoteAddr().String())
}
cp.connCounterDel(bufConn.conn.RemoteAddr().String())
bufConn.conn.Close()
bufConn.conn = nil
bufConn.enabled = false
}
if conn, err := cp.newConn(); err == nil {
bufConn.conn = conn
bufConn.wr.Reset(conn)
bufConn.ctime = time.Now()
bufConn.enabled = true
return nil
} else {
bufConn.enabled = false
return err
}
}
//putBufConn 把BufConn放回到pool中
func (cp *connPool) putBufConn(bufConn *LancerBufConn) {
if bufConn.enabled == false {
cp.putInvalidBufConn(bufConn)
return
}
if bufConn.ctime.Add(time.Duration(cp.c.IdleTimeout)).Before(time.Now()) {
bufConn.wr.Flush()
cp.putInvalidBufConn(bufConn)
return
}
cp.putValidBufConn(bufConn)
}
// putValidBufConn 把 bufConn放到可用的pool中
func (cp *connPool) putValidBufConn(bufConn *LancerBufConn) {
select {
case cp.validBufConnChan <- bufConn:
return
default:
log.Warn("BufConnChan full, discardthis shouldn't happen")
return
}
}
// putInvalidBufConn 把bufConn放到不可用的pool中
func (cp *connPool) putInvalidBufConn(bufConn *LancerBufConn) {
select {
case cp.invalidBufConnChan <- bufConn:
return
default:
log.Warn("invalidBufConnChan full, discardthis shouldn't happen")
return
}
}
// maintainBufConnPool 维护BufConnPool状态
func (cp *connPool) maintainBufConnPool() {
for {
select {
case bufConn := <-cp.invalidBufConnChan:
cp.setConn(bufConn)
cp.putBufConn(bufConn)
}
time.Sleep(time.Second * 1)
}
}
//markConnInvalid会将链接关闭并且将相应upstreamserver设置为不可用
func (cp *connPool) markUpstreamInvalid(addr string) (err error) {
log.Error("mark upstream %s invalid", addr)
cp.invalidUpstreamsLock.Lock()
cp.invalidUpstreams[addr] = nil
cp.invalidUpstreamsLock.Unlock()
return
}
// markUpstreamValid 将某一addr设置为不可用
func (cp *connPool) markUpstreamValid(addr string) (err error) {
log.Info("%s is valid again", addr)
cp.invalidUpstreamsLock.Lock()
delete(cp.invalidUpstreams, addr)
cp.invalidUpstreamsLock.Unlock()
return
}
// connCounterAdd 连接数+1
func (cp *connPool) connCounterAdd(addr string) {
cp.connCounterLock.Lock()
defer cp.connCounterLock.Unlock()
if _, ok := cp.connCounter[addr]; ok {
cp.connCounter[addr] += 1
} else {
cp.connCounter[addr] = 1
}
return
}
//connCounterDel 连接数-1
func (cp *connPool) connCounterDel(addr string) {
cp.connCounterLock.Lock()
defer cp.connCounterLock.Unlock()
if _, ok := cp.connCounter[addr]; ok {
cp.connCounter[addr] -= 1
}
}
// connPoolStatus 返回connPool状态
func (cp *connPool) connPoolStatus() interface{} {
status := make(map[string]interface{})
status["conn_num"] = cp.connCounter
status["invalidUpstreams"] = cp.invalidUpstreams
return status
}
// maintainUpstream 维护upstream的健康状态
func (cp *connPool) maintainUpstream() {
for {
cp.invalidUpstreamsLock.RLock()
tryAddrs := make([]string, 0, len(cp.invalidUpstreams))
for k := range cp.invalidUpstreams {
tryAddrs = append(tryAddrs, k)
}
cp.invalidUpstreamsLock.RUnlock()
for _, addr := range tryAddrs {
if conn, err := net.DialTimeout("tcp", addr, time.Duration(cp.c.DialTimeout)); err == nil && conn != nil {
conn.Close()
cp.markUpstreamValid(addr)
}
}
time.Sleep(time.Second * 10)
}
}
//ReleaseConnPool 释放连接池中所有链接
func (cp *connPool) ReleaseConnPool() {
log.Info("Release Conn Pool")
close(cp.validBufConnChan)
close(cp.invalidBufConnChan)
for conn := range cp.validBufConnChan {
conn.enabled = false
conn.wr.Flush()
conn.conn.Close()
}
}