Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

View File

@@ -0,0 +1,46 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"config.go",
"file.go",
"glob.go",
"harvester.go",
"multiline.go",
"reader.go",
"registrar.go",
"state.go",
"states.go",
],
importpath = "go-common/app/service/ops/log-agent/input/file",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/service/ops/log-agent/event:go_default_library",
"//app/service/ops/log-agent/input:go_default_library",
"//app/service/ops/log-agent/pkg/lancerroute:go_default_library",
"//library/log:go_default_library",
"//library/time:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,103 @@
package file
import (
"errors"
"time"
xtime "go-common/library/time"
"github.com/BurntSushi/toml"
)
type Config struct {
Paths []string `toml:"paths"`
Symlinks bool `toml:"symlinks"`
AppId string `toml:"appId"`
LogId string `toml:"logId"`
ConfigPath string `toml:"-"`
MetaPath string `toml:"-"`
ID string `toml:"-"`
ReadFrom string `toml:"readFrom"`
MaxLength int `toml:"maxLength"`
IgnoreOlder xtime.Duration `toml:"ignoreOlder"`
CleanFilesOlder xtime.Duration `toml:"cleanFilesOlder"`
ScanFrequency xtime.Duration `toml:"scanFrequency"`
CleanInactive xtime.Duration `toml:"cleanInactive"`
HarvesterTTL xtime.Duration `toml:"harvesterTTL"` // harvester will stop itself if inactive longer than HarvesterTTL
Multiline *MultilineConf `toml:"multiline"`
Timeout xtime.Duration `toml:"timeout"`
Fields map[string]interface{} `toml:"fields"`
}
func (c *Config) ConfigValidate() (error) {
if c == nil {
return errors.New("config of file Input is nil")
}
if len(c.Paths) == 0 {
return errors.New("paths of file Input can't be nil")
}
if c.LogId == "" {
c.LogId = "000161"
}
if c.AppId == "" {
return errors.New("appId of file Input can't be nil")
}
if c.IgnoreOlder == 0 {
c.IgnoreOlder = xtime.Duration(time.Hour * 24)
}
if c.ScanFrequency == 0 {
c.ScanFrequency = xtime.Duration(time.Second * 10)
}
// Note: CleanInactive should be greater chan ignore_older + scan_frequency
if c.CleanInactive == 0 {
c.CleanInactive = xtime.Duration(time.Hour * 24 * 7)
}
if c.CleanInactive < c.IgnoreOlder+c.ScanFrequency {
return errors.New("CleanInactive must be greater than ScanFrequency + IgnoreOlder")
}
if c.HarvesterTTL == 0 {
c.HarvesterTTL = xtime.Duration(time.Hour * 1)
}
if c.Timeout == 0 {
c.Timeout = xtime.Duration(time.Second * 5)
}
if c.ReadFrom != "" && c.ReadFrom != "newest" && c.ReadFrom != "oldest" {
return errors.New("ReadFrom of file input can only be newest or oldest")
}
if c.ReadFrom == "" {
c.ReadFrom = "newest"
}
if c.MaxLength == 0 || c.MaxLength > 1024*10*64 {
c.MaxLength = 1024 * 10 * 64
}
// Symlinks is always disabled
c.Symlinks = false
if c.Multiline != nil {
if err := c.Multiline.ConfigValidate(); err != nil {
return err
}
}
return nil
}
func DecodeConfig(md toml.MetaData, primValue toml.Primitive) (c interface{}, err error) {
c = new(Config)
if err = md.PrimitiveDecode(primValue, c); err != nil {
return nil, err
}
return c, nil
}

View File

@@ -0,0 +1,213 @@
package file
import (
"context"
"fmt"
"time"
"path"
"crypto/sha1"
"encoding/base64"
"errors"
"os"
"go-common/app/service/ops/log-agent/event"
"go-common/app/service/ops/log-agent/input"
"go-common/library/log"
)
type File struct {
c *Config
output chan<- *event.ProcessorEvent
ctx context.Context
cancel context.CancelFunc
register *Registrar
}
func init() {
err := input.Register("file", NewFile)
if err != nil {
panic(err)
}
}
func NewFile(ctx context.Context, config interface{}, output chan<- *event.ProcessorEvent) (input.Input, error) {
f := new(File)
if c, ok := config.(*Config); !ok {
return nil, fmt.Errorf("Error config for File Input")
} else {
if err := c.ConfigValidate(); err != nil {
return nil, err
}
f.c = c
}
f.output = output
f.ctx, f.cancel = context.WithCancel(ctx)
// set config by ctx
if f.c.ConfigPath == "" {
configPath := ctx.Value("configPath")
if configPath == nil {
return nil, errors.New("can't get configPath from context")
}
f.c.ConfigPath = configPath.(string)
}
if f.c.ID == "" {
hasher := sha1.New()
hasher.Write([]byte(f.c.ConfigPath))
f.c.ID = base64.URLEncoding.EncodeToString(hasher.Sum(nil))
}
if f.c.MetaPath == "" {
f.c.MetaPath = ctx.Value("MetaPath").(string)
}
// init register
if err := f.initRegister(); err != nil {
return nil, err
}
return f, nil
}
func (f *File) Run() (err error) {
log.Info("start collect log configured in %s", f.c.ConfigPath)
f.scan()
ticker := time.Tick(time.Duration(f.c.ScanFrequency))
go func() {
for {
select {
case <-f.ctx.Done():
return
case <-ticker:
f.scan()
}
}
}()
return nil
}
func (f *File) Stop() {
f.cancel()
}
func (f *File) Ctx() (context.Context) {
return f.ctx
}
func (f *File) initRegister() error {
path := path.Join(f.c.MetaPath, f.c.ID)
register, err := NewRegistry(f.ctx, path)
if err != nil {
return err
}
f.register = register
return nil
}
// Scan starts a scanGlob for each provided path/glob
func (f *File) scan() {
paths := f.getFiles()
// clean files older than
if time.Duration(f.c.CleanFilesOlder) != 0 {
f.cleanOldFiles(paths)
}
for path, info := range paths {
select {
case <-f.ctx.Done():
return
default:
}
newState, err := getFileState(path, info)
if err != nil {
log.Error("Skipping file %s due to error %s", path, err)
continue
}
// Load last state
lastState := f.register.FindPrevious(newState)
// Ignores all files which fall under ignore_older
if f.isIgnoreOlder(newState) {
continue
}
// Decides if previous state exists
if lastState.IsEmpty() {
log.Info("Start harvester for new file: %s, inode: %d", newState.Source, newState.Inode)
ctx := context.WithValue(f.ctx, "firstRun", true)
err := f.startHarvester(ctx, f.c, f.register, newState, 0)
if err != nil {
log.Error("Harvester could not be started on new file: %s, Err: %s", newState.Source, err)
}
} else {
ctx := context.WithValue(f.ctx, "firstRun", false)
f.harvestExistingFile(ctx, f.c, f.register, newState, lastState)
}
}
}
func (f *File) cleanOldFiles(paths map[string]os.FileInfo) {
if time.Duration(f.c.CleanFilesOlder) == 0 {
return
}
var latestFile *State
for path, info := range paths {
newState, err := getFileState(path, info)
if err != nil {
log.Error("Skipping file %s due to error %s", path, err)
continue
}
if latestFile == nil {
latestFile = &newState
continue
}
if newState.Fileinfo.ModTime().After(latestFile.Fileinfo.ModTime()) {
// delete latestFile if newer file existing and modtime of latestFile is older than f.c.CleanFilesOlder
if time.Since(latestFile.Fileinfo.ModTime()) > time.Duration(f.c.CleanFilesOlder) {
if err := os.Remove(latestFile.Source); err != nil {
log.Error("Failed to delete file %s", latestFile.Source)
} else {
log.Info("Delete file %s older than %s", latestFile.Source, time.Duration(f.c.CleanFilesOlder).String())
}
}
latestFile = &newState
continue
}
if newState.Fileinfo.ModTime().Before(latestFile.Fileinfo.ModTime()) {
if time.Since(newState.Fileinfo.ModTime()) > time.Duration(f.c.CleanFilesOlder) {
if err := os.Remove(newState.Source); err != nil {
log.Error("Failed to delete file %s", newState.Source)
} else {
log.Info("Delete file %s older than %s", newState.Source, time.Duration(f.c.CleanFilesOlder))
}
}
}
}
}
// isIgnoreOlder checks if the given state reached ignore_older
func (f *File) isIgnoreOlder(state State) bool {
// ignore_older is disable
if f.c.IgnoreOlder == 0 {
return false
}
modTime := state.Fileinfo.ModTime()
if time.Since(modTime) > time.Duration(f.c.IgnoreOlder) {
return true
}
return false
}

View File

@@ -0,0 +1,74 @@
package file
import (
"os"
"path/filepath"
"path"
"go-common/library/log"
)
func (f *File) getFiles() map[string]os.FileInfo {
paths := map[string]os.FileInfo{}
for _, p := range f.c.Paths {
// logs in docker overlay2
if MergedDir := f.ctx.Value("MergedDir"); MergedDir != nil {
p = path.Join(MergedDir.(string), p)
}
matches, err := filepath.Glob(p)
if err != nil {
log.Error("glob(%s) failed: %v", p, err)
continue
}
// Check any matched files to see if we need to start a harvester
for _, file := range matches {
// check if the file is in the exclude_files list
//if f.isFileExcluded(file) {
// log.Info("input", "Exclude file: %s", file)
// continue
//}
// Fetch Lstat File info to detected also symlinks
fileInfo, err := os.Lstat(file)
if err != nil {
log.Warn("lstat(%s) failed: %s", file, err)
continue
}
if fileInfo.IsDir() {
log.Warn("Skipping directory: %s", file)
continue
}
isSymlink := fileInfo.Mode()&os.ModeSymlink > 0
if isSymlink && !f.c.Symlinks {
log.Warn("File %s skipped as it is a symlink.", file)
continue
}
// Fetch Stat file info which fetches the inode. In case of a symlink, the original inode is fetched
fileInfo, err = os.Stat(file)
if err != nil {
log.Warn("stat(%s) failed: %s", file, err)
continue
}
// If symlink is enabled, it is checked that original is not part of same input
// It original is harvested by other input, states will potentially overwrite each other
//if p.config.Symlinks {
// for _, finfo := range paths {
// if os.SameFile(finfo, fileInfo) {
// log.Info("Same file found as symlink and originap. Skipping file: %s", file)
// continue OUTER
// }
// }
//}
paths[file] = fileInfo
}
}
return paths
}

View File

@@ -0,0 +1,503 @@
package file
import (
"fmt"
"os"
"errors"
"io"
"time"
"context"
"bytes"
"strconv"
"regexp"
"go-common/app/service/ops/log-agent/event"
"go-common/library/log"
"go-common/app/service/ops/log-agent/pkg/lancerroute"
)
type Source interface {
io.ReadCloser
Name() string
Stat() (os.FileInfo, error)
Continuable() bool // can we continue processing after EOF?
HasState() bool // does this source have a state?
}
type Hfile struct {
*os.File
}
var lineReadTimeout = errors.New("lineReadTimeout")
func (Hfile) Continuable() bool { return true }
func (Hfile) HasState() bool { return true }
// Harvester contains all harvester related data
type Harvester struct {
config *Config
source *os.File
ctx context.Context
cancel context.CancelFunc
state State
register *Registrar
reader Reader
output chan<- *event.ProcessorEvent
active time.Time
lineBuffer *bytes.Buffer
multilineBuffer *bytes.Buffer
firstLine []byte
readFunc func() ([]byte, error)
}
// startHarvester starts a new harvester with the given offset
func (f *File) startHarvester(ctx context.Context, c *Config, register *Registrar, state State, offset int64) (err error) {
// Set state to "not" finished to indicate that a harvester is running
state.Finished = false
state.Offset = offset
// Create harvester with state
h, err := NewHarvester(c, register, state, f.output)
if err != nil {
return err
}
h.ctx, h.cancel = context.WithCancel(ctx)
err = h.Setup()
if err != nil {
return fmt.Errorf("error setting up harvester: %s", err)
}
// Update state before staring harvester
// This makes sure the states is set to Finished: false
// This is synchronous state update as part of the scan
h.register.SendStateUpdate(h.state)
h.active = time.Now()
h.lineBuffer = bytes.NewBuffer(make([]byte, 0, h.config.MaxLength))
h.multilineBuffer = bytes.NewBuffer(make([]byte, 0, h.config.MaxLength))
if h.config.Multiline != nil {
h.readFunc = h.readMultiLine
} else {
h.readFunc = h.readOneLine
}
go h.stateUpdatePeriodically()
go h.Run()
go h.activeCheck()
return err
}
// harvestExistingFile continues harvesting a file with a known state if needed
func (f *File) harvestExistingFile(ctx context.Context, c *Config, register *Registrar, newState State, oldState State) {
//log.Info("Update existing file for harvesting: %s, offset: %v", newState.Source, oldState.Offset)
// No harvester is running for the file, start a new harvester
// It is important here that only the size is checked and not modification time, as modification time could be incorrect on windows
// https://blogs.technet.microsoft.com/asiasupp/2010/12/14/file-date-modified-property-are-not-updating-while-modifying-a-file-without-closing-it/
if oldState.Finished && newState.Fileinfo.Size() > oldState.Offset {
// Resume harvesting of an old file we've stopped harvesting from
// This could also be an issue with force_close_older that a new harvester is started after each scan but not needed?
// One problem with comparing modTime is that it is in seconds, and scans can happen more then once a second
log.Info("Resuming harvesting of file: %s, offset: %d, new size: %d", newState.Source, oldState.Offset, newState.Fileinfo.Size())
err := f.startHarvester(ctx, c, register, newState, oldState.Offset)
if err != nil {
log.Error("Harvester could not be started on existing file: %s, Err: %s", newState.Source, err)
}
return
}
// File size was reduced -> truncated file
if newState.Fileinfo.Size() < oldState.Offset {
log.Info("Old file was truncated. Starting from the beginning: %s, old size: %d new size: %d", newState.Source, oldState.Offset, newState.Fileinfo.Size())
if oldState.Finished {
err := f.startHarvester(ctx, c, register, newState, 0)
if err != nil {
log.Error("Harvester could not be started on truncated file: %s, Err: %s", newState.Source, err)
}
return
}
// just stop old harvester
h := f.register.GetHarvester(oldState.Inode)
if h != nil {
h.Stop()
}
return
}
// Check if file was renamed
if oldState.Source != "" && oldState.Source != newState.Source {
// This does not start a new harvester as it is assume that the older harvester is still running
// or no new lines were detected. It sends only an event status update to make sure the new name is persisted.
log.Info("File rename was detected: %s -> %s, Current offset: %v", oldState.Source, newState.Source, oldState.Offset)
oldState.Source = newState.Source
f.register.SendStateUpdate(oldState)
}
if !oldState.Finished {
// Nothing to do. Harvester is still running and file was not renamed
log.V(1).Info("Harvester for file is still running: %s, inode %d", newState.Source, newState.Inode)
} else {
log.V(1).Info("File didn't change: %s, inode %d", newState.Source, newState.Inode)
}
}
// NewHarvester creates a new harvester
func NewHarvester(c *Config, register *Registrar, state State, output chan<- *event.ProcessorEvent) (*Harvester, error) {
h := &Harvester{
config: c,
state: state,
register: register,
output: output,
}
// Add ttl if cleanInactive is set
if h.config.CleanInactive > 0 {
h.state.TTL = time.Duration(h.config.CleanInactive)
}
// Add outlet signal so harvester can also stop itself
return h, nil
}
// Setup opens the file handler and creates the reader for the harvester
func (h *Harvester) Setup() error {
err := h.openFile()
if err != nil {
return fmt.Errorf("Harvester setup failed. Unexpected file opening error: %s", err)
}
h.reader, err = h.newLogFileReader()
if err != nil {
if h.source != nil {
h.source.Close()
}
return fmt.Errorf("Harvester setup failed. Unexpected encoding line reader error: %s", err)
}
return nil
}
// openFile opens a file and checks for the encoding. In case the encoding cannot be detected
// or the file cannot be opened because for example of failing read permissions, an error
// is returned and the harvester is closed. The file will be picked up again the next time
// the file system is scanned
func (h *Harvester) openFile() error {
f, err := os.OpenFile(h.state.Source, os.O_RDONLY, os.FileMode(0))
if err != nil {
return fmt.Errorf("Failed opening %s: %s", h.state.Source, err)
}
// Makes sure file handler is also closed on errors
err = h.validateFile(f)
if err != nil {
f.Close()
return err
}
h.source = f
return nil
}
func (h *Harvester) validateFile(f *os.File) error {
info, err := f.Stat()
if err != nil {
return fmt.Errorf("Failed getting stats for file %s: %s", h.state.Source, err)
}
if !info.Mode().IsRegular() {
return fmt.Errorf("Tried to open non regular file: %q %s", info.Mode(), info.Name())
}
// Compares the stat of the opened file to the state given by the input. Abort if not match.
if !os.SameFile(h.state.Fileinfo, info) {
return errors.New("file info is not identical with opened file. Aborting harvesting and retrying file later again")
}
// get file offset. Only update offset if no error
offset, err := h.initFileOffset(f)
if err != nil {
return err
}
log.V(1).Info("harvester Setting offset for file: %s inode %d. Offset: %d ", h.state.Source, h.state.Inode, offset)
h.state.Offset = offset
return nil
}
// initFileOffset set offset for file handler
func (h *Harvester) initFileOffset(file *os.File) (int64, error) {
// continue from last known offset
if h.state.Offset > 0 {
return file.Seek(h.state.Offset, os.SEEK_SET)
}
var firstRun = false
if v := h.ctx.Value("firstRun"); v != nil {
firstRun = v.(bool)
}
if h.config.ReadFrom == "newest" && firstRun {
return file.Seek(0, os.SEEK_END)
}
return file.Seek(0, os.SEEK_CUR)
}
func (h *Harvester) newLogFileReader() (Reader, error) {
return NewLineReader(h.source, h.config.MaxLength)
}
func (h *Harvester) WriteToProcessor(message []byte) {
e := event.GetEvent()
e.Write(message)
e.AppId = []byte(h.config.AppId)
e.LogId = h.config.LogId
e.Source = "file"
// update fields
for k, v := range h.config.Fields {
e.Fields[k] = v
}
e.Fields["file"] = h.state.Source
e.Destination = lancerroute.GetLancerByLogid(e.LogId)
// time maybe overwrite by processor
e.Time = time.Now()
e.TimeRangeKey = strconv.FormatInt(e.Time.Unix()/100*100, 10)
h.output <- e
}
func (h *Harvester) stateUpdatePeriodically() {
interval := time.Tick(time.Second * 5)
var offset int64
for {
select {
case <-interval:
if h.state.Offset > offset {
offset = h.state.Offset
h.register.SendStateUpdate(h.state)
h.active = time.Now()
}
case <-h.ctx.Done():
h.register.SendStateUpdate(h.state)
return
}
}
}
func (h *Harvester) Stop() {
h.state.Finished = true
h.register.SendStateUpdate(h.state)
h.cancel()
log.Info("Harvester for File: %s Inode %d Existed", h.state.Source, h.state.Inode)
}
func (h *Harvester) activeCheck() {
interval := time.Tick(time.Minute * 1)
for {
select {
case <-interval:
if time.Now().Sub(h.active) > time.Duration(h.config.HarvesterTTL) {
log.Info("Harvester for file: %s, inode: %d is inactive longer than HarvesterTTL, Ended", h.state.Source, h.state.Inode)
h.Stop()
return
}
case <-h.ctx.Done():
return
}
}
}
func (h *Harvester) readMultiLine() (b []byte, err error) {
h.multilineBuffer.Reset()
counter := 0
if h.firstLine != nil {
h.multilineBuffer.Write(h.firstLine)
}
ctx, _ := context.WithTimeout(h.ctx, time.Duration(h.config.Timeout))
for {
select {
case <-ctx.Done():
h.firstLine = nil
return h.multilineBuffer.Bytes(), lineReadTimeout
default:
}
message, err := h.readOneLine()
if err != nil && err != io.EOF && err != lineReadTimeout {
h.firstLine = nil
if h.multilineBuffer.Len() == 0 {
return message, nil
}
if len(message) > 0 {
h.multilineBuffer.Write([]byte{'\n'})
h.multilineBuffer.Write(message)
}
return h.multilineBuffer.Bytes(), err
}
if len(message) == 0 {
continue
}
matched, err := regexp.Match(h.config.Multiline.Pattern, message)
if matched {
// old multiline ended
if h.firstLine != nil {
h.firstLine = message
return h.multilineBuffer.Bytes(), nil
}
// pure new multiline
if h.firstLine == nil {
h.firstLine = message
h.multilineBuffer.Write(message)
continue
}
}
if !matched {
// multiline not begin
if h.firstLine == nil {
return message, nil
}
if h.firstLine != nil {
h.multilineBuffer.Write([]byte{'\n'})
h.multilineBuffer.Write(message)
counter += 1
}
}
if counter > h.config.Multiline.MaxLines || h.multilineBuffer.Len() > h.config.MaxLength {
h.firstLine = nil
return h.multilineBuffer.Bytes(), nil
}
}
}
func (h *Harvester) readOneLine() (b []byte, err error) {
h.lineBuffer.Reset()
ctx, _ := context.WithTimeout(h.ctx, time.Duration(h.config.Timeout))
for {
select {
case <-ctx.Done():
return h.lineBuffer.Bytes(), lineReadTimeout
default:
}
message, advance, err := h.reader.Next()
// update offset
h.state.Offset += int64(advance)
if err == nil && h.lineBuffer.Len() == 0 {
return message, nil
}
h.lineBuffer.Write(message)
if err == nil {
return h.lineBuffer.Bytes(), nil
}
if err == io.EOF && advance == 0 {
time.Sleep(time.Millisecond * 100)
continue
}
if err == io.EOF && advance > 0 && h.lineBuffer.Len() >= h.config.MaxLength {
return h.lineBuffer.Bytes(), nil
}
if err != nil {
return h.lineBuffer.Bytes(), err
}
}
}
//// Run start the harvester and reads files line by line and sends events to the defined output
//func (h *Harvester) Run() {
// log.V(1).Info("Harvester started for file: %s, inode %d", h.state.Source, h.state.Inode)
// h.register.RegisterHarvester(h)
// defer h.register.UnRegisterHarvester(h)
//
// var line = make([]byte, 0, h.config.MaxLength)
// for {
// select {
// case <-h.ctx.Done():
// return
// default:
// }
// //TODO MaxLength check
// message, advance, err := h.reader.Next()
// // update offset
// h.state.Offset += int64(advance)
//
// if err == nil {
// if len(line) == 0 {
// h.WriteToProcessor(message)
// continue
// }
// line = append(line, message...)
// h.WriteToProcessor(line)
// line = line[:0]
// continue
// }
//
// if err == io.EOF && advance == 0 {
// time.Sleep(time.Millisecond * 100)
// continue
// }
//
// if err == io.EOF && advance > 0 {
// line = append(line, message...)
// if len(line) >= h.config.MaxLength {
// h.WriteToProcessor(line)
// line = line[:0]
// }
// continue
// }
//
// if err != nil {
// log.Error("Harvester Read line error: %v; File: %v", err, h.state.Source)
// h.Stop()
// return
// }
// }
//}
// Run start the harvester and reads files line by line and sends events to the defined output
func (h *Harvester) Run() {
log.V(1).Info("Harvester started for file: %s, inode %d", h.state.Source, h.state.Inode)
h.register.RegisterHarvester(h)
defer h.register.UnRegisterHarvester(h)
for {
select {
case <-h.ctx.Done():
return
default:
}
message, err := h.readFunc()
if err == lineReadTimeout {
log.V(1).Info("lineReadTimeout when harvesting %s", h.state.Source)
}
if len(message) > 0 {
h.WriteToProcessor(message)
}
if err != nil && err != lineReadTimeout && err != io.EOF {
log.Error("Harvester Read line error: %v; File: %v", err, h.state.Source)
h.Stop()
return
}
}
}

View File

@@ -0,0 +1,31 @@
package file
import (
"errors"
"regexp"
"fmt"
)
type MultilineConf struct {
Pattern string `toml:"pattern"`
MaxLines int `toml:"maxLines"`
}
func (c *MultilineConf) ConfigValidate() (error) {
if c == nil {
return errors.New("config of Multiline is nil")
}
if c.Pattern == "" {
return errors.New("Pattern in Multiline can't be nil")
}
if _, err := regexp.Compile(c.Pattern); err != nil {
return fmt.Errorf("Multiline pattern compile error: %s", err)
}
if c.MaxLines == 0 {
c.MaxLines = 200
}
return nil
}

View File

@@ -0,0 +1,80 @@
package file
import (
"io"
"bufio"
"bytes"
)
// Message represents a reader event with timestamp, content and actual number
// of bytes read from input before decoding.
//type Message struct {
// Ts time.Time // timestamp the content was read
// Content []byte // actual content read
// Bytes int // total number of bytes read to generate the message
// //Fields common.MapStr // optional fields that can be added by reader
//}
type Reader interface {
Next() ([]byte, int, error)
}
type LineReader struct {
reader io.Reader
rb *bufio.Reader
bufferSize int
nl []byte
nlSize int
scan *bufio.Scanner
}
// New creates a new reader object
func NewLineReader(input io.Reader, bufferSize int) (*LineReader, error) {
nl := []byte{'\n'}
r := &LineReader{
reader: input,
bufferSize: bufferSize,
nl: nl,
nlSize: len(nl),
}
r.rb = bufio.NewReaderSize(input, r.bufferSize)
r.scan = bufio.NewScanner(r.rb)
r.scan.Split(ScanLines)
return r, nil
}
func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
// We have a full newline-terminated line.
return i + 1, data[0:i], nil
}
// Request more data.
return 0, nil, nil
}
// Next reads the next line until the new line character
func (r *LineReader) Next() ([]byte, int, error) {
body, err := r.rb.ReadBytes('\n')
advance := len(body)
//if err == io.EOF && advance > 0 {
// return body, advance, err
//}
// remove '\n'
if len(body) > 0 && body[len(body)-1] == '\n' {
body = body[0:len(body)-1]
}
// remove '\r'
if len(body) > 0 && body[len(body)-1] == '\r' {
body = body[0: len(body)-1]
}
return body, advance, err
}

View File

@@ -0,0 +1,288 @@
package file
import (
"sync"
"os"
"fmt"
"time"
"io"
"path/filepath"
"encoding/json"
"context"
"go-common/library/log"
)
type Registrar struct {
Channel chan State
registryFile string // Path to the Registry File
wg sync.WaitGroup
states *States // Map with all file paths inside and the corresponding state
bufferedStateUpdates int
flushInterval time.Duration
harvesters map[uint64]*Harvester
hLock sync.RWMutex
ctx context.Context
cancel context.CancelFunc
}
// New creates a new Registrar instance, updating the registry file on
// `file.State` updates. New fails if the file can not be opened or created.
func NewRegistry(ctx context.Context, registryFile string) (*Registrar, error) {
r := &Registrar{
registryFile: registryFile,
states: NewStates(),
Channel: make(chan State, 100),
wg: sync.WaitGroup{},
flushInterval: time.Second * 5,
harvesters: make(map[uint64]*Harvester),
}
r.ctx, r.cancel = context.WithCancel(ctx)
err := r.Init()
if err != nil {
return nil, err
}
go r.Run()
return r, err
}
func (r *Registrar) RegisterHarvester(h *Harvester) error {
r.hLock.Lock()
defer r.hLock.Unlock()
if _, ok := r.harvesters[h.state.Inode]; !ok {
r.harvesters[h.state.Inode] = h
return nil
}
return fmt.Errorf("harvestor of inode %s Re registered", h.state.Inode)
}
func (r *Registrar) UnRegisterHarvester(h *Harvester) error {
r.hLock.Lock()
defer r.hLock.Unlock()
if _, ok := r.harvesters[h.state.Inode]; ok {
delete(r.harvesters, h.state.Inode)
return nil
}
return fmt.Errorf("harvestor of inode %d not found", h.state.Inode)
}
func (r *Registrar) GetHarvester(i uint64) *Harvester {
r.hLock.RLock()
defer r.hLock.RUnlock()
if h, ok := r.harvesters[i]; ok {
return h
}
return nil
}
// Init sets up the Registrar and make sure the registry file is setup correctly
func (r *Registrar) Init() (err error) {
// Create directory if it does not already exist.
registryPath := filepath.Dir(r.registryFile)
err = os.MkdirAll(registryPath, 0750)
if err != nil {
return fmt.Errorf("Failed to created registry file dir %s: %v", registryPath, err)
}
// Check if files exists
fileInfo, err := os.Lstat(r.registryFile)
if os.IsNotExist(err) {
log.Info("No registry file found under: %s. Creating a new registry file.", r.registryFile)
// No registry exists yet, write empty state to check if registry can be written
return r.writeRegistry()
}
if err != nil {
return err
}
// Check if regular file, no dir, no symlink
if !fileInfo.Mode().IsRegular() {
// Special error message for directory
if fileInfo.IsDir() {
return fmt.Errorf("Registry file path must be a file. %s is a directory.", r.registryFile)
}
return fmt.Errorf("Registry file path is not a regular file: %s", r.registryFile)
}
log.Info("Registry file set to: %s", r.registryFile)
// load states
if err = r.loadStates(); err != nil {
return err
}
return nil
}
// writeRegistry writes the new json registry file to disk.
func (r *Registrar) writeRegistry() error {
// First clean up states
r.gcStates()
// TODO lock for reading r.states.states
tempfile, err := writeTmpFile(r.registryFile, r.states.states)
if err != nil {
return err
}
err = SafeFileRotate(r.registryFile, tempfile)
if err != nil {
return err
}
log.V(1).Info("Registry file %s updated. %d states written.", r.registryFile, len(r.states.states))
return nil
}
// SafeFileRotate safely rotates an existing file under path and replaces it with the tempfile
func SafeFileRotate(path, tempfile string) error {
parent := filepath.Dir(path)
if e := os.Rename(tempfile, path); e != nil {
return e
}
// best-effort fsync on parent directory. The fsync is required by some
// filesystems, so to update the parents directory metadata to actually
// contain the new file being rotated in.
f, err := os.Open(parent)
if err != nil {
return nil // ignore error
}
defer f.Close()
f.Sync()
return nil
}
// loadStates fetches the previous reading state from the configure RegistryFile file
// The default file is `registry` in the data path.
func (r *Registrar) loadStates() error {
f, err := os.Open(r.registryFile)
if err != nil {
return err
}
defer f.Close()
log.Info("Loading registrar data from %s", r.registryFile)
states, err := readStatesFrom(f)
if err != nil {
return err
}
states = r.preProcessStates(states)
r.states.SetStates(states)
log.V(1).Info("States Loaded from registrar%s : %+v", r.registryFile, len(states))
return nil
}
func (r *Registrar) preProcessStates(states map[uint64]State) map[uint64]State {
for key, state := range states {
// set all states to finished
state.Finished = true
states[key] = state
}
return states
}
func readStatesFrom(in io.Reader) (map[uint64]State, error) {
states := make(map[uint64]State)
decoder := json.NewDecoder(in)
if err := decoder.Decode(&states); err != nil {
return nil, fmt.Errorf("Error decoding states: %s", err)
}
return states, nil
}
func writeTmpFile(baseName string, states map[uint64]State) (string, error) {
tempfile := baseName + ".new"
f, err := os.OpenFile(tempfile, os.O_RDWR|os.O_CREATE|os.O_TRUNC|os.O_SYNC, 0640)
if err != nil {
log.Error("Failed to create tempfile (%s) for writing: %s", tempfile, err)
return "", err
}
defer f.Close()
encoder := json.NewEncoder(f)
if err := encoder.Encode(states); err != nil {
log.Error("Error when encoding the states: %s", err)
return "", err
}
// Commit the changes to storage to avoid corrupt registry files
if err = f.Sync(); err != nil {
log.Error("Error when syncing new registry file contents: %s", err)
return "", err
}
return tempfile, nil
}
// gcStates runs a registry Cleanup. The method check if more event in the
// registry can be gc'ed in the future. If no potential removable state is found,
// the gcEnabled flag is set to false, indicating the current registrar state being
// stable. New registry update events can re-enable state gc'ing.
func (r *Registrar) gcStates() {
//if !r.gcRequired {
// return
//}
beforeCount := len(r.states.states)
cleanedStates := r.states.Cleanup()
log.V(1).Info(
"Registrar states %s cleaned up. Before: %d, After: %d", r.registryFile,
beforeCount, beforeCount-cleanedStates)
}
// FindPrevious lookups a registered state, that matching the new state.
// Returns a zero-state if no match is found.
func (r *Registrar) FindPrevious(newState State) State {
return r.states.FindPrevious(newState)
}
func (r *Registrar) Run() {
log.Info("Starting Registrar for: %s", r.registryFile)
flushC := time.Tick(r.flushInterval)
for {
select {
case <-flushC:
r.flushRegistry()
case state := <-r.Channel:
r.processEventStates(state)
case <-r.ctx.Done():
r.flushRegistry()
return
}
}
}
func (r *Registrar) flushRegistry() {
if err := r.writeRegistry(); err != nil {
log.Error("Writing of registry returned error: %v. Continuing...", err)
}
}
// processEventStates gets the states from the events and writes them to the registrar state
func (r *Registrar) processEventStates(state State) {
r.states.UpdateWithTs(state, time.Now())
}
func (r *Registrar) SendStateUpdate(state State) {
r.Channel <- state
//select {
//case r.Channel <- state:
//default:
// log.Warn("state update receiving chan full")
//}
}

View File

@@ -0,0 +1,63 @@
package file
import (
"time"
"os"
"fmt"
"path/filepath"
"syscall"
)
type State struct {
Source string `json:"source"`
Offset int64 `json:"offset"`
Inode uint64 `json:"inode"`
Fileinfo os.FileInfo `json:"-"` // the file info
Timestamp time.Time `json:"timestamp"`
Finished bool `json:"finished"`
Meta map[string]string `json:"meta"`
TTL time.Duration `json:"ttl"`
}
// NewState creates a new file state
func NewState(fileInfo os.FileInfo, path string) State {
stat := fileInfo.Sys().(*syscall.Stat_t)
return State{
Fileinfo: fileInfo,
Inode: stat.Ino,
Source: path,
Finished: false,
Timestamp: time.Now(),
TTL: -1, // By default, state does have an infinite ttl
Meta: nil,
}
}
func (s *State) ID() uint64 {
return s.Inode
}
// IsEqual compares the state to an other state supporting stringer based on the unique string
func (s *State) IsEqual(c *State) bool {
return s.ID() == c.ID()
}
// IsEmpty returns true if the state is empty
func (s *State) IsEmpty() bool {
return s.Inode == 0 &&
s.Source == "" &&
len(s.Meta) == 0 &&
s.Timestamp.IsZero()
}
func getFileState(path string, info os.FileInfo) (State, error) {
var err error
var absolutePath string
absolutePath, err = filepath.Abs(path)
if err != nil {
return State{}, fmt.Errorf("could not fetch abs path for file %s: %s", absolutePath, err)
}
// Create new state for comparison
newState := NewState(info, absolutePath)
return newState, nil
}

View File

@@ -0,0 +1,101 @@
package file
import (
"sync"
"time"
"go-common/library/log"
)
// States handles list of FileState. One must use NewStates to instantiate a
// file states registry. Using the zero-value is not safe.
type States struct {
sync.RWMutex
// states store
states map[uint64]State
}
// NewStates generates a new states registry.
func NewStates() *States {
return &States{
states: map[uint64]State{},
}
}
// Update updates a state. If previous state didn't exist, new one is created
func (s *States) Update(newState State) {
s.Lock()
defer s.Unlock()
id := newState.ID()
if _, ok := s.states[id]; ok {
s.states[id] = newState
return
}
log.V(1).Info("New state added for %s", id)
s.states[id] = newState
}
// Cleanup cleans up the state array. All states which are older then `older` are removed
// The number of states that were cleaned up is returned.
func (s *States) Cleanup() (int) {
s.Lock()
defer s.Unlock()
currentTime := time.Now()
statesBefore := len(s.states)
for inode, state := range s.states {
if state.Finished && state.TTL > 0 && currentTime.Sub(state.Timestamp) > state.TTL {
delete(s.states, inode)
}
}
return statesBefore - len(s.states)
}
// GetStates creates copy of the file states.
func (s *States) GetState(id uint64) State {
s.RLock()
defer s.RUnlock()
if _, ok := s.states[id]; ok {
return s.states[id]
}
return State{}
}
// FindPrevious lookups a registered state, that matching the new state.
// Returns a zero-state if no match is found.
func (s *States) FindPrevious(newState State) State {
s.RLock()
defer s.RUnlock()
if s, ok := s.states[newState.ID()]; ok {
return s
}
return State{}
}
// SetStates overwrites all internal states with the given states array
func (s *States) SetStates(states map[uint64]State) {
s.Lock()
defer s.Unlock()
s.states = states
}
// UpdateWithTs updates a state, assigning the given timestamp.
// If previous state didn't exist, new one is created
func (s *States) UpdateWithTs(newState State, ts time.Time) {
id := newState.ID()
oldState := s.FindPrevious(newState)
newState.Timestamp = ts
s.Lock()
defer s.Unlock()
s.states[id] = newState
if oldState.IsEmpty() {
log.V(1).Info("New state added for %s", newState.Source)
}
}