Create & Init Project...

This commit is contained in:
2019-04-22 18:49:16 +08:00
commit fc4fa37393
25440 changed files with 4054998 additions and 0 deletions

22
app/job/bbq/recall/BUILD Normal file
View File

@@ -0,0 +1,22 @@
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/job/bbq/recall/api/v1:all-srcs",
"//app/job/bbq/recall/cmd:all-srcs",
"//app/job/bbq/recall/internal/conf:all-srcs",
"//app/job/bbq/recall/internal/dao:all-srcs",
"//app/job/bbq/recall/internal/model:all-srcs",
"//app/job/bbq/recall/internal/service:all-srcs",
"//app/job/bbq/recall/proto:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,24 @@
### v1.0.7
1. 新发视频标签时效改为24小时内视频
### v1.0.6
1. 实时倒排取ctime为当天的视频
### v1.0.5
1. 增加新发视频实时倒排索引
2. 正排质量信息增加曝光数
### v1.0.4
1. 正排标签逻辑优化
### v1.0.3
1. 正排离线三级标签
### v1.0.2
1. 正排部署脚本修改
### v1.0.1
1. 离线数据字段修改
### v1.0.0
1. BBQ召回正排数据离线计算

View File

@@ -0,0 +1,6 @@
# Owner
daiwei
# Author
# Reviewer

10
app/job/bbq/recall/OWNERS Normal file
View File

@@ -0,0 +1,10 @@
# See the OWNERS docs at https://go.k8s.io/owners
approvers:
- daiwei
labels:
- bbq
- job
- job/bbq/recall
options:
no_parent_owners: true

View File

@@ -0,0 +1,12 @@
# recall-job
## 项目简介
1.
## 编译环境
## 依赖包
## 编译执行

View File

@@ -0,0 +1,48 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "v1_proto",
srcs = ["api.proto"],
tags = ["automanaged"],
)
go_proto_library(
name = "v1_go_proto",
compilers = ["@io_bazel_rules_go//proto:go_proto"],
importpath = "go-common/app/job/bbq/recall/api/v1",
proto = ":v1_proto",
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["generate.go"],
embed = [":v1_go_proto"],
importpath = "go-common/app/job/bbq/recall/api/v1",
tags = ["automanaged"],
visibility = ["//visibility:public"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,16 @@
// 定义项目 API 的 proto 文件 可以同时描述 gRPC 和 HTTP API
// protobuf 文件参考:
// - https://developers.google.com/protocol-buffers/
// - http://info.bilibili.co/display/documentation/gRPC+Proto
// protobuf 生成 HTTP 工具:
// - http://git.bilibili.co/platform/go-common/tree/master/app/tool/protoc-gen-bm
syntax = "proto3";
// package 命名使用 {discovery_id}.{version} 的方式, version 形如 v1, v2, v1beta ..
// NOTE: 不知道的 discovery_id 请询问大佬, 新项目找大佬申请 discovery_id先到先得抢注
// e.g. account.service.v1
// package {discovery_id}.{version}
// NOTE: 最后请删除这些无用的注释 (゜-゜)つロ
option go_package = "v1";

View File

@@ -0,0 +1,4 @@
package v1
// 生成 gRPC 代码
//go:generate $GOPATH/src/go-common/app/tool/warden/protoc.sh

View File

@@ -0,0 +1,42 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_binary",
"go_library",
)
go_binary(
name = "cmd",
embed = [":go_default_library"],
tags = ["automanaged"],
)
go_library(
name = "go_default_library",
srcs = ["main.go"],
data = ["test.toml"],
importpath = "go-common/app/job/bbq/recall/cmd",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/internal/conf:go_default_library",
"//app/job/bbq/recall/internal/service:go_default_library",
"//library/log:go_default_library",
"//library/net/trace:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,61 @@
package main
import (
"flag"
"os"
"os/signal"
"syscall"
"time"
"go-common/app/job/bbq/recall/internal/conf"
"go-common/app/job/bbq/recall/internal/service"
"go-common/library/log"
"go-common/library/net/trace"
)
var (
_serviceName string
)
func init() {
flag.StringVar(&_serviceName, "service", "", "run service name")
}
func main() {
flag.Parse()
if err := conf.Init(); err != nil {
panic(err)
}
log.Init(conf.Conf.Log)
defer log.Close()
log.Info("recall-job start")
trace.Init(conf.Conf.Tracer)
defer trace.Close()
svc := service.New(conf.Conf)
defer svc.Close()
if _serviceName != "" {
svc.RunSrv(_serviceName)
} else {
svc.InitCron()
deamon()
}
}
func deamon() {
c := make(chan os.Signal, 1)
signal.Notify(c, syscall.SIGHUP, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT)
for {
s := <-c
log.Info("get a signal %s", s.String())
switch s {
case syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGINT:
log.Info("recall-job exit")
time.Sleep(time.Second)
return
case syscall.SIGHUP:
default:
return
}
}
}

View File

@@ -0,0 +1,83 @@
[log]
stdout = true
[mysql]
addr = "172.16.38.91:3306"
dsn = "root:123456@tcp(172.16.38.91:3306)/bbq?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"
readDSN = ["root:123456@tcp(172.16.38.91:3306)/bbq?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"]
active = 20
idle = 10
idleTimeout ="4h"
queryTimeout = "800ms"
execTimeout = "800ms"
tranTimeout = "1000ms"
[offlineMysql]
addr = "172.16.38.91:3306"
dsn = "root:123456@tcp(172.16.38.91:3306)/bbq?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"
readDSN = ["root:123456@tcp(172.16.38.91:3306)/bbq?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"]
active = 20
idle = 10
idleTimeout ="4h"
queryTimeout = "800ms"
execTimeout = "800ms"
tranTimeout = "1000ms"
[cmsMysql]
addr = "172.16.38.91:3306"
dsn = "root:123456@tcp(172.16.38.91:3306)/bbq_cms?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"
readDSN = ["root:123456@tcp(172.16.38.91:3306)/bbq_cms?allowNativePasswords=true&timeout=800ms&readTimeout=1200ms&writeTimeout=800ms&parseTime=true&loc=Local&charset=utf8,utf8mb4"]
active = 20
idle = 10
idleTimeout ="4h"
queryTimeout = "800ms"
execTimeout = "800ms"
tranTimeout = "1000ms"
[redis]
name = "bbq-web"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[bfredis]
name = "bbq-bf"
proto = "tcp"
addr = "172.16.38.91:6379"
idle = 10
active = 10
dialTimeout = "1s"
readTimeout = "1s"
writeTimeout = "1s"
idleTimeout = "10s"
expire = "1m"
[berserker]
[[berserker.keys]]
owner = "daiwei"
appkey = "66ed588a742c72408bc2d876afa8f7ca"
secret = "7d7a63e37fe6af52a58fb4755b62360e"
[[berserker.api]]
name = "video_quality"
url = "http://berserker.bilibili.co/avenger/api/175/query"
[[berserker.api]]
name = "video_view"
url = "http://berserker.bilibili.co/avenger/api/154/query"
[job]
[job.forwardIndex]
jobName = "genForwardIndex"
schedule = "@every 30m"
input = "/Users/daiwei/Desktop/"
output = "/Users/daiwei/forward_index.txt"
[job.bloomfilter]
jobName = "genBloomFilter"
schedule = "@every 30m"
input = ""
output = ""

View File

@@ -0,0 +1,43 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"berserker.go",
"conf.go",
],
importpath = "go-common/app/job/bbq/recall/internal/conf",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//library/cache/redis:go_default_library",
"//library/conf:go_default_library",
"//library/database/sql:go_default_library",
"//library/ecode/tip:go_default_library",
"//library/log:go_default_library",
"//library/net/http/blademaster:go_default_library",
"//library/net/http/blademaster/middleware/verify:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//library/net/trace:go_default_library",
"//vendor/github.com/BurntSushi/toml:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,20 @@
package conf
// BerserkerConfig .
type BerserkerConfig struct {
Keys []*BerserkerKey
API []*BerserkerAPI
}
// BerserkerAPI .
type BerserkerAPI struct {
Name string
URL string
}
// BerserkerKey .
type BerserkerKey struct {
Owner string
AppKey string
Secret string
}

View File

@@ -0,0 +1,113 @@
package conf
import (
"errors"
"flag"
"go-common/library/cache/redis"
"go-common/library/conf"
"go-common/library/database/sql"
ecode "go-common/library/ecode/tip"
"go-common/library/log"
bm "go-common/library/net/http/blademaster"
"go-common/library/net/http/blademaster/middleware/verify"
"go-common/library/net/rpc/warden"
"go-common/library/net/trace"
"github.com/BurntSushi/toml"
)
var (
confPath string
client *conf.Client
// Conf config
Conf = &Config{}
)
// Config .
type Config struct {
Log *log.Config
BM *bm.ServerConfig
Verify *verify.Config
Tracer *trace.Config
Redis *redis.Config
BfRedis *redis.Config
MySQL *sql.Config
OfflineMySQL *sql.Config
CmsMySQL *sql.Config
Ecode *ecode.Config
Berserker *BerserkerConfig
Job *Jobs
GRPCClient map[string]*GRPCConfig
}
// Jobs .
type Jobs struct {
ForwardIndex *JobConfig
BloomFilter *JobConfig
}
// JobConfig .
type JobConfig struct {
JobName string
Schedule string
Input string
Output string
}
// GRPCConfig .
type GRPCConfig struct {
WardenConf *warden.ClientConfig
Addr string
}
func init() {
flag.StringVar(&confPath, "conf", "", "default config path")
}
// Init init conf
func Init() error {
if confPath != "" {
return local()
}
return remote()
}
func local() (err error) {
_, err = toml.DecodeFile(confPath, &Conf)
return
}
func remote() (err error) {
if client, err = conf.New(); err != nil {
return
}
if err = load(); err != nil {
return
}
go func() {
for range client.Event() {
log.Info("config reload")
if load() != nil {
log.Error("config reload error (%v)", err)
}
}
}()
return
}
func load() (err error) {
var (
s string
ok bool
tmpConf *Config
)
if s, ok = client.Toml2(); !ok {
return errors.New("load config center error")
}
if _, err = toml.Decode(s, &tmpConf); err != nil {
return errors.New("could not decode config")
}
*Conf = *tmpConf
return
}

View File

@@ -0,0 +1,49 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"berserker.go",
"bloomfilter.go",
"dao.go",
"hdfs.go",
"inverted_index.go",
"video.go",
],
importpath = "go-common/app/job/bbq/recall/internal/dao",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/internal/conf:go_default_library",
"//app/job/bbq/recall/internal/model:go_default_library",
"//app/job/bbq/recall/proto:go_default_library",
"//app/job/bbq/recall/proto/quality:go_default_library",
"//app/service/bbq/recsys-recall/api/grpc/v1:go_default_library",
"//library/cache/redis:go_default_library",
"//library/database/sql:go_default_library",
"//library/log:go_default_library",
"//library/net/rpc/warden:go_default_library",
"//vendor/github.com/Dai0522/go-hash/bloomfilter:go_default_library",
"//vendor/github.com/golang/snappy:go_default_library",
"//vendor/github.com/json-iterator/go:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,14 @@
package dao
import (
"crypto/md5"
"encoding/hex"
"fmt"
)
func (d *Dao) berserkerSign(ak, sk, dt, ver string) string {
str := fmt.Sprintf("%sappKey%stimestamp%sversion%s%s", sk, ak, dt, ver, sk)
b := md5.Sum([]byte(str))
sign := hex.EncodeToString(b[:])
return sign
}

View File

@@ -0,0 +1,68 @@
package dao
import (
"context"
"strings"
"time"
"github.com/Dai0522/go-hash/bloomfilter"
)
// FetchMidView .
func (d *Dao) FetchMidView(c context.Context) (result []string, err error) {
dt := time.Now().AddDate(0, 0, -1).Format("20060102")
hdfs, err := d.scanHDFSPath(c, d.c.Berserker.API[1].URL, d.c.Berserker.Keys[0], "/"+dt+"/mid/")
if err != nil {
return
}
for _, v := range hdfs.Result {
raw, err := d.loadHDFSFile(c, d.c.Berserker.API[1].URL, d.c.Berserker.Keys[0], "/"+dt+"/mid/"+v)
if err != nil {
break
}
lines := strings.Split(string(*raw), "\n")
result = append(result, lines...)
}
return
}
// FetchBuvidView .
func (d *Dao) FetchBuvidView(c context.Context) (result []string, err error) {
dt := time.Now().AddDate(0, 0, -1).Format("20060102")
hdfs, err := d.scanHDFSPath(c, d.c.Berserker.API[1].URL, d.c.Berserker.Keys[0], "/"+dt+"/buvid/")
if err != nil {
return
}
for _, v := range hdfs.Result {
raw, err := d.loadHDFSFile(c, d.c.Berserker.API[1].URL, d.c.Berserker.Keys[0], "/"+dt+"/buvid/"+v)
if err != nil {
break
}
lines := strings.Split(string(*raw), "\n")
result = append(result, lines...)
}
return
}
// InsertBloomFilter 构建BF插入redis
func (d *Dao) InsertBloomFilter(c context.Context, key string, svidList []uint64) error {
bfK := "BBQ:BF:V1:" + key
bf, err := bloomfilter.New(uint64(len(svidList)), 0.0001)
if err != nil {
return err
}
for _, v := range svidList {
bf.PutUint64(v)
}
b := bf.Serialized()
return d.SetBloomFilter(c, bfK, b)
}
// SetBloomFilter .
func (d *Dao) SetBloomFilter(c context.Context, key string, b *[]byte) error {
conn := d.bfredis.Get(c)
defer conn.Close()
_, err := conn.Do("SETEX", []byte(key), 86400, *b)
return err
}

View File

@@ -0,0 +1,56 @@
package dao
import (
"context"
"go-common/app/job/bbq/recall/internal/conf"
recall "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
"go-common/library/cache/redis"
xsql "go-common/library/database/sql"
"go-common/library/net/rpc/warden"
)
// Dao dao
type Dao struct {
c *conf.Config
redis *redis.Pool
bfredis *redis.Pool
db *xsql.DB
dbOffline *xsql.DB
dbCms *xsql.DB
recallClient recall.RecsysRecallClient
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
dao = &Dao{
c: c,
redis: redis.NewPool(c.Redis),
bfredis: redis.NewPool(c.BfRedis),
db: xsql.NewMySQL(c.MySQL),
dbOffline: xsql.NewMySQL(c.OfflineMySQL),
dbCms: xsql.NewMySQL(c.CmsMySQL),
recallClient: newRecallClient(c.GRPCClient["recall"]),
}
return
}
func newRecallClient(cfg *conf.GRPCConfig) recall.RecsysRecallClient {
cc, err := warden.NewClient(cfg.WardenConf).Dial(context.Background(), cfg.Addr)
if err != nil {
panic(err)
}
return recall.NewRecsysRecallClient(cc)
}
// Close close the resource.
func (d *Dao) Close() {
d.redis.Close()
d.db.Close()
}
// Ping dao ping
func (d *Dao) Ping(ctx context.Context) error {
// TODO: add mc,redis... if you use
return d.db.Ping(ctx)
}

View File

@@ -0,0 +1,66 @@
package dao
import (
"context"
"io/ioutil"
"net/http"
"net/url"
"time"
"github.com/json-iterator/go"
"go-common/app/job/bbq/recall/internal/conf"
"go-common/app/job/bbq/recall/internal/model"
)
func (d *Dao) queryHDFS(c context.Context, api string, key *conf.BerserkerKey, suffix string) (result *[]byte, err error) {
dt := time.Now().Format("2006-01-02 15:04:05")
sign := d.berserkerSign(key.AppKey, key.Secret, dt, "1.0")
params := &url.Values{}
params.Set("appKey", key.AppKey)
params.Set("timestamp", dt)
params.Set("version", "1.0")
params.Set("signMethod", "md5")
params.Set("sign", sign)
fileSuffix := struct {
FileSuffix string `json:"fileSuffix"`
}{
FileSuffix: suffix,
}
j, err := jsoniter.Marshal(fileSuffix)
params.Set("query", string(j))
for retry := 0; retry < 3; retry++ {
resp, err := http.DefaultClient.Get(api + "?" + params.Encode())
if err != nil {
continue
}
b, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err == nil && len(b) > 0 {
result = &b
break
}
// sleep 5s berserker limit
time.Sleep(5 * time.Second)
}
return
}
func (d *Dao) scanHDFSPath(c context.Context, api string, key *conf.BerserkerKey, suffix string) (result *model.HDFSResult, err error) {
b, err := d.queryHDFS(c, api, key, suffix)
if err != nil {
return
}
result = &model.HDFSResult{}
err = jsoniter.Unmarshal(*b, result)
return
}
func (d *Dao) loadHDFSFile(c context.Context, api string, key *conf.BerserkerKey, suffix string) (result *[]byte, err error) {
return d.queryHDFS(c, api, key, suffix)
}

View File

@@ -0,0 +1,16 @@
package dao
import (
"context"
recall "go-common/app/service/bbq/recsys-recall/api/grpc/v1"
)
// SetInvertedIndex 倒排写入redis
func (d *Dao) SetInvertedIndex(c context.Context, key string, svids []int64) error {
_, err := d.recallClient.NewIncomeVideo(c, &recall.NewIncomeVideoRequest{
Key: key,
SVIDs: svids,
})
return err
}

View File

@@ -0,0 +1,143 @@
package dao
import (
"context"
"encoding/hex"
"fmt"
"strings"
"time"
"go-common/app/job/bbq/recall/internal/model"
"go-common/app/job/bbq/recall/proto"
"go-common/app/job/bbq/recall/proto/quality"
"go-common/library/log"
"github.com/golang/snappy"
)
const (
// _fetchVideo = "select `id`, `title`, `content`, `mid`, `avid`, `cid`, `pubtime`, `ctime`, `mtime`, `duration`, `state`, `tid`, `sub_tid` from video where pubtime > ? limit ?, ?;"
_fetchVideo = "select `svid`, `title`, `content`, `mid`, `avid`, `cid`, `pubtime`, `ctime`, `mtime`, `duration`, `state`, `tid`, `sub_tid` from video limit ?, ?;"
_fetchVideoTag = "select `id`, `name`, `type` from `tag` where `id` = ? and `status` = 1;"
_fetchVideoTagAll = "select `id`, `name`, `type` from `tag` where `status` = 1;"
_fetchVideoTextTag = "select `tag` from `video_repository` where `svid` = ? limit 1;"
_queryVideoQuality = "select `stat_info` from `video_forward_index_stat_info` where `svid` = ?;"
_fetchNewIncomeVideo = "select `svid` from `video` where ctime > ? and state in (%s);"
)
// FetchVideoInfo .
func (d *Dao) FetchVideoInfo(c context.Context, offset, size int) (result []*model.Video, err error) {
// rows, err := d.db.Query(c, _fetchVideo, ptime.Format("2006-01-02 15:04:05"), offset, size)
rows, err := d.db.Query(c, _fetchVideo, offset, size)
if err != nil {
return nil, err
}
for rows.Next() {
tmp := &model.Video{}
if err = rows.Scan(&tmp.SVID, &tmp.Title, &tmp.Content, &tmp.MID, &tmp.AVID, &tmp.CID, &tmp.PubTime, &tmp.CTime, &tmp.MTime, &tmp.Duration, &tmp.State, &tmp.TID, &tmp.SubTID); err != nil {
log.Error("FetchVideoInfo: %v", err)
return nil, err
}
result = append(result, tmp)
}
return result, nil
}
// FetchVideoTagAll .
func (d *Dao) FetchVideoTagAll(c context.Context) (result []*proto.Tag, err error) {
result = make([]*proto.Tag, 0)
rows, err := d.db.Query(c, _fetchVideoTagAll)
if err != nil {
return
}
for rows.Next() {
tmp := new(proto.Tag)
if err = rows.Scan(&tmp.TagID, &tmp.TagName, &tmp.TagType); err != nil {
log.Error("FetchVideoTag: %v", err)
continue
}
result = append(result, tmp)
}
return
}
// FetchVideoTag .
func (d *Dao) FetchVideoTag(c context.Context, tid int32) (result *proto.Tag, err error) {
row := d.db.QueryRow(c, _fetchVideoTag, tid)
result = new(proto.Tag)
if err = row.Scan(&result.TagID, &result.TagName, &result.TagType); err != nil {
log.Error("FetchVideoTag: %v", err)
return
}
return
}
// FetchVideoTextTag .
func (d *Dao) FetchVideoTextTag(c context.Context, svid int64) (result []string, err error) {
row := d.dbCms.QueryRow(c, _fetchVideoTextTag, svid)
var tags string
if err = row.Scan(&tags); err != nil {
log.Errorv(c, log.KV("log", "_fetchVideoTextTag failed"), log.KV("error", err), log.KV("svid", svid))
return
}
result = strings.Split(tags, ",")
return
}
// FetchVideoQuality .
func (d *Dao) FetchVideoQuality(c context.Context, svid uint64) (result *quality.VideoQuality, err error) {
var raw string
row := d.dbOffline.QueryRow(c, _queryVideoQuality, svid)
row.Scan(&raw)
if raw == "" {
return
}
trimed := strings.Trim(raw, "\n")
hexDst, err := hex.DecodeString(trimed)
if err != nil {
log.Error("FetchVideoQuality: %v src[%s] raw[%s]", err, trimed, trimed)
return
}
snappyDst, err := snappy.Decode(nil, hexDst)
if err != nil {
log.Error("FetchVideoQuality: %v src[%s] raw[%s]", err, string(hexDst), trimed)
return
}
result = &quality.VideoQuality{}
result.Unmarshal(snappyDst)
if err != nil {
log.Error("FetchVideoQuality: %v src[%s] raw[%s]", err, snappyDst, trimed)
}
return
}
// FetchNewincomeVideo .
func (d *Dao) FetchNewincomeVideo() (res []int64, err error) {
duration, _ := time.ParseDuration("-24h")
today := time.Now().Add(duration).Format("2006-01-02")
_query := fmt.Sprintf(_fetchNewIncomeVideo, strings.Join(model.RecommendVideoState, ","))
row, err := d.db.Query(context.Background(), _query, today)
if err != nil {
return
}
res = make([]int64, 0)
for row.Next() {
var tmp int64
if err = row.Scan(&tmp); err != nil {
return
}
res = append(res, tmp)
}
return
}

View File

@@ -0,0 +1,29 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = ["model.go"],
importpath = "go-common/app/job/bbq/recall/internal/model",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = ["//library/time:go_default_library"],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,32 @@
package model
import (
xtime "go-common/library/time"
)
// RecommendVideoState 所有进推荐池的新发视频
var RecommendVideoState = []string{"5", "4", "3", "1", "0"}
// HDFSResult .
type HDFSResult struct {
Code int16 `json:"code"`
Msg string `json:"msg"`
Result []string `json:"result"`
}
// Video .
type Video struct {
SVID int64
Title string
Content string
MID int64
AVID int64
CID int64
PubTime xtime.Time
CTime xtime.Time
MTime xtime.Time
Duration int32
State int16
TID int32
SubTID int32
}

View File

@@ -0,0 +1,41 @@
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
go_library(
name = "go_default_library",
srcs = [
"bloomfilter.go",
"forward_index.go",
"inverted_index.go",
"service.go",
],
importpath = "go-common/app/job/bbq/recall/internal/service",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/internal/conf:go_default_library",
"//app/job/bbq/recall/internal/dao:go_default_library",
"//app/job/bbq/recall/proto:go_default_library",
"//library/log:go_default_library",
"//vendor/github.com/golang/snappy:go_default_library",
"//vendor/github.com/robfig/cron:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

View File

@@ -0,0 +1,52 @@
package service
import (
"context"
"strconv"
"strings"
"go-common/library/log"
)
// GenBloomFilter .
func (s *Service) GenBloomFilter() {
log.Info("run [%s]", "GenBloomFilter")
result, err := s.dao.FetchMidView(context.Background())
if err != nil {
log.Error("FetchMidView: %v", err)
return
}
s.bloomFilter(result)
result, err = s.dao.FetchBuvidView(context.Background())
if err != nil {
log.Error("FetchBuvidView: %v", err)
return
}
s.bloomFilter(result)
log.Info("finish [%s]", "GenBloomFilter")
}
func (s *Service) bloomFilter(result []string) {
m := make(map[string][]uint64)
for _, v := range result {
items := strings.Split(v, "\u0001")
if len(items) != 2 {
continue
}
if _, ok := m[items[0]]; !ok {
m[items[0]] = []uint64{}
}
svid, _ := strconv.Atoi(items[1])
m[items[0]] = append(m[items[0]], uint64(svid))
}
for k, v := range m {
if k == "" {
continue
}
if err := s.dao.InsertBloomFilter(context.Background(), k, v); err != nil {
log.Error("InsertBloomFilter: %v", err)
continue
}
}
}

View File

@@ -0,0 +1,135 @@
package service
import (
"context"
"encoding/hex"
"os"
"os/exec"
"go-common/app/job/bbq/recall/proto"
"go-common/library/log"
"github.com/golang/snappy"
)
// GenForwardIndex 生产正排索引
func (s *Service) GenForwardIndex() {
log.Info("run [%s]", "GenForwardIndex")
c := context.Background()
vInfo, err := s.videoBasicInfo(c)
if err != nil {
log.Error("video info: %v", err)
return
}
outputFile, err := os.OpenFile(s.c.Job.ForwardIndex.Output, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
if err != nil {
log.Error("open file: %v", err)
return
}
shadowFile, _ := os.OpenFile(s.c.Job.ForwardIndex.Output+".bak", os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
defer outputFile.Close()
defer shadowFile.Close()
for _, v := range vInfo {
qu, _ := s.dao.FetchVideoQuality(c, v.SVID)
tmp := &proto.ForwardIndex{
SVID: v.SVID,
BasicInfo: v,
VideoQuality: qu,
}
raw, err := tmp.Marshal()
if err != nil {
log.Error("json marshal: %v", err)
continue
}
_, err = outputFile.WriteString(hex.EncodeToString(snappy.Encode(nil, raw)))
if err != nil {
log.Error("output: %v", err)
}
outputFile.Write([]byte("\n"))
if err != nil {
log.Error("output endline: %v", err)
}
shadowFile.WriteString(tmp.String())
if err != nil {
log.Error("shadow: %v", err)
}
shadowFile.Write([]byte("\n"))
if err != nil {
log.Error("shadow endline: %v", err)
}
}
exec.Command(s.c.Job.ForwardIndex.Output + ".sh").Run()
log.Info("finish [GenForwardIndex]")
s.GenRealTimeInvertedIndex()
}
func (s *Service) videoBasicInfo(c context.Context) (result []*proto.VideoInfo, err error) {
// fetch tag info from db
tags, err := s.dao.FetchVideoTagAll(c)
if err != nil {
return
}
tagIDMap := make(map[int32]*proto.Tag)
tagNameMap := make(map[string]*proto.Tag)
for _, v := range tags {
tagIDMap[v.TagID] = v
tagNameMap[v.TagName] = v
}
// fetch video info from db
offset := 0
size := 1000
basic, err := s.dao.FetchVideoInfo(c, offset, size)
if err != nil {
log.Error("FetchVideoInfo: %v", err)
return
}
for len(basic) > 0 && err == nil {
log.Info("FetchVideoInfo: %v", len(result))
for _, v := range basic {
vInfo := &proto.VideoInfo{
SVID: uint64(v.SVID),
Title: v.Title,
Content: v.Content,
MID: uint64(v.MID),
AVID: uint64(v.AVID),
CID: uint64(v.CID),
PubTime: v.PubTime.Time().Unix(),
CTime: v.CTime.Time().Unix(),
MTime: v.MTime.Time().Unix(),
Duration: uint32(v.Duration),
State: int32(v.State),
}
vTags := make([]*proto.Tag, 0)
// 一级标签
if tag, ok := tagIDMap[v.TID]; ok {
vTags = append(vTags, tag)
}
// 二级标签
if subTag, ok := tagIDMap[v.SubTID]; ok {
vTags = append(vTags, subTag)
}
// 三级标签
if textTags, e := s.dao.FetchVideoTextTag(c, v.SVID); e == nil {
for _, v := range textTags {
if tmp, ok := tagNameMap[v]; ok {
vTags = append(vTags, tmp)
}
}
}
vInfo.Tags = vTags
result = append(result, vInfo)
}
offset += size
basic, err = s.dao.FetchVideoInfo(c, offset, size)
if err != nil {
log.Error("FetchVideoInfo: %v", err)
}
}
return
}

View File

@@ -0,0 +1,57 @@
package service
import (
"context"
"fmt"
"go-common/library/log"
"math/rand"
"sort"
"time"
)
const (
_nblocks = 5
_redisPrefix = "RECALL:NEWPUB:%d"
)
// GenRealTimeInvertedIndex 实时倒排标签
func (s *Service) GenRealTimeInvertedIndex() {
svids, err := s.dao.FetchNewincomeVideo()
if err != nil || svids == nil || len(svids) == 0 {
log.Error("GenRealTimeInvertedIndex FetchNewincomeVideo err[%v] svids[%v]", err, svids)
return
}
// svid 乱序
rand.Seed(time.Now().Unix())
sort.Slice(svids, func(i int, j int) bool {
return rand.Float32() > 0.5
})
// 平均分为5份
offset := 0
blocks := len(svids) / _nblocks
invertedIndex := make([][]int64, _nblocks)
for i := 0; i < _nblocks; i++ {
invertedIndex[i] = make([]int64, 0)
for j := 0; j < blocks; j++ {
invertedIndex[i] = append(invertedIndex[i], svids[offset+j])
}
offset = offset + blocks
}
if blocks*_nblocks < len(svids) {
invertedIndex[_nblocks-1] = append(invertedIndex[_nblocks-1], svids[len(svids)-1])
}
log.Info("GenRealTimeInvertedIndex invertedIndex[%v]", invertedIndex)
// 序列化后写入redis
for i, v := range invertedIndex {
key := fmt.Sprintf(_redisPrefix, i)
err = s.dao.SetInvertedIndex(context.Background(), key, v)
if err != nil {
log.Error("GenRealTimeInvertedIndex SetInvertedIndex err[%v]", err)
}
}
log.Info("finish [GenRealTimeInvertedIndex]")
}

View File

@@ -0,0 +1,64 @@
package service
import (
"context"
"go-common/app/job/bbq/recall/internal/conf"
"go-common/app/job/bbq/recall/internal/dao"
"go-common/library/log"
"github.com/robfig/cron"
)
// Service struct
type Service struct {
c *conf.Config
dao *dao.Dao
sche *cron.Cron
}
// New init
func New(c *conf.Config) (s *Service) {
s = &Service{
c: c,
dao: dao.New(c),
sche: cron.New(),
}
return s
}
// InitCron .
func (s *Service) InitCron() {
s.sche.AddFunc("@every 3s", s.HeartBeat)
s.sche.AddFunc(s.c.Job.ForwardIndex.Schedule, s.GenForwardIndex)
s.sche.AddFunc(s.c.Job.BloomFilter.Schedule, s.GenBloomFilter)
s.sche.Start()
}
// RunSrv .
func (s *Service) RunSrv(name string) {
log.Info("run job{%s}", name)
switch name {
case s.c.Job.ForwardIndex.JobName:
s.GenForwardIndex()
case s.c.Job.BloomFilter.JobName:
s.GenBloomFilter()
default:
s.HeartBeat()
}
}
// HeartBeat .
func (s *Service) HeartBeat() {
log.Info("alive...")
}
// Ping Service
func (s *Service) Ping(ctx context.Context) (err error) {
return s.dao.Ping(ctx)
}
// Close Service
func (s *Service) Close() {
s.dao.Close()
}

View File

@@ -0,0 +1,64 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "proto_proto",
srcs = ["forward_index.proto"],
tags = ["automanaged"],
deps = [
"//app/job/bbq/recall/proto/quality:quality_proto",
"@gogo_special_proto//github.com/gogo/protobuf/gogoproto",
],
)
go_proto_library(
name = "proto_go_proto",
compilers = ["@io_bazel_rules_go//proto:gogofast_proto"],
importpath = "go-common/app/job/bbq/recall/proto",
proto = ":proto_proto",
tags = ["automanaged"],
deps = [
"//app/job/bbq/recall/proto/quality:quality_go_proto",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
],
)
go_library(
name = "go_default_library",
srcs = [],
embed = [":proto_go_proto"],
importpath = "go-common/app/job/bbq/recall/proto",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"//app/job/bbq/recall/proto/quality:go_default_library",
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [
":package-srcs",
"//app/job/bbq/recall/proto/quality:all-srcs",
],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,38 @@
syntax = "proto3";
package bbq.job.recall.forwardindex;
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
import "app/job/bbq/recall/proto/quality/quality.proto";
option go_package = "proto";
option (gogoproto.goproto_getters_all) = false;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
message ForwardIndex {
uint64 svid = 1 [(gogoproto.customname) = "SVID"]; //视频id
VideoInfo basic_info = 2 [(gogoproto.customname) = "BasicInfo"];
VideoQuality video_quality = 3 [(gogoproto.customname) = "VideoQuality"]; //月 度视频质量指标
}
message VideoInfo {
uint64 svid = 1 [(gogoproto.customname) = "SVID"];
string title = 2 [(gogoproto.customname) = "Title"];
string content = 3 [(gogoproto.customname) = "Content"];
uint64 mid = 4 [(gogoproto.customname) = "MID"];
uint64 avid = 5 [(gogoproto.customname) = "AVID"];
uint64 cid = 6 [(gogoproto.customname) = "CID"];
int64 pubtime = 7 [(gogoproto.customname) = "PubTime"];
int64 ctime = 8 [(gogoproto.customname) = "CTime"];
int64 mtime = 9 [(gogoproto.customname) = "MTime"];
uint32 duration = 10 [(gogoproto.customname) = "Duration"];
int32 state = 11 [(gogoproto.customname) = "State"];
repeated Tag tags = 12 [(gogoproto.customname) = "Tags"];
}
message Tag {
string name = 1 [(gogoproto.customname) = "TagName"];
int32 type = 2 [(gogoproto.customname) = "TagType"];
int32 ID = 3 [(gogoproto.customname) = "TagID"];
}

View File

@@ -0,0 +1,54 @@
load(
"@io_bazel_rules_go//proto:def.bzl",
"go_proto_library",
)
package(default_visibility = ["//visibility:public"])
load(
"@io_bazel_rules_go//go:def.bzl",
"go_library",
)
proto_library(
name = "quality_proto",
srcs = ["quality.proto"],
tags = ["automanaged"],
deps = ["@gogo_special_proto//github.com/gogo/protobuf/gogoproto"],
)
go_proto_library(
name = "quality_go_proto",
compilers = ["@io_bazel_rules_go//proto:gogofast_proto"],
importpath = "go-common/app/job/bbq/recall/proto/quality",
proto = ":quality_proto",
tags = ["automanaged"],
deps = ["@com_github_gogo_protobuf//gogoproto:go_default_library"],
)
go_library(
name = "go_default_library",
srcs = [],
embed = [":quality_go_proto"],
importpath = "go-common/app/job/bbq/recall/proto/quality",
tags = ["automanaged"],
visibility = ["//visibility:public"],
deps = [
"@com_github_gogo_protobuf//gogoproto:go_default_library",
"@com_github_gogo_protobuf//proto:go_default_library",
],
)
filegroup(
name = "package-srcs",
srcs = glob(["**"]),
tags = ["automanaged"],
visibility = ["//visibility:private"],
)
filegroup(
name = "all-srcs",
srcs = [":package-srcs"],
tags = ["automanaged"],
visibility = ["//visibility:public"],
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
syntax = "proto3";
import "github.com/gogo/protobuf/gogoproto/gogo.proto";
option go_package = "quality";
option (gogoproto.goproto_getters_all) = false;
option (gogoproto.goproto_stringer_all) = false;
option (gogoproto.stringer_all) = true;
message VideoQuality {
string svid = 1; //视频id
Quality quality_info_m = 2; //月 度视频质量指标
Quality quality_info_w = 3; //周 视频质量指标
Quality quality_info_h = 4; //天 视频质量指标
Quality quality_info_ms_m = 5; //月度 主站视频质量指标
Quality quality_info_ms_w = 6; //周 主站视频质量指标
Quality quality_info_ms_h = 7; //天 主站视频质量指标
}
//质量
message Quality {
uint64 pub_time = 1; //发布时间
uint32 play_cnt = 2; //播放数
uint32 absolute_play_cnt = 3; //播完数
uint32 fav_cnt = 4; //收藏数
uint32 like_cnt = 5; //点赞数
uint32 coin_cnt = 6; //投币数
uint32 share_cnt = 7; //分享数
uint32 danmu_cnt = 8; //弹幕数
uint32 neg_eval_cnt = 9; //负面评价数
uint32 comment_add_cnt = 10; //评论数
uint32 comment_like_cnt = 11; //评论点赞
uint32 comment_report_cnt = 12; //评论回复
double absolute_play_rate = 13; //播完率:平滑 播完 / 播放数
double like_rate = 14; //点赞率:平滑 点赞 / 播放数
double share_rate = 15; //分享率:平滑 分享 / 播放数
double reply_rate = 16; //回复率:平滑 (评论 + 评论回复 + 评论点赞) / 播放数
uint32 imp_cnt = 17; //曝光数
}