go-common/app/service/live/recommend/internal/dao/dao.go

466 lines
11 KiB
Go
Raw Normal View History

2019-04-22 10:49:16 +00:00
package dao
import (
"context"
"fmt"
"sort"
"strconv"
"strings"
"time"
"github.com/pkg/errors"
"go-common/app/service/live/recommend/internal/conf"
"go-common/app/service/live/recommend/recconst"
relation_api "go-common/app/service/live/relation/api/liverpc"
room_api "go-common/app/service/live/room/api/liverpc"
"go-common/library/cache/redis"
"go-common/library/log"
"go-common/library/net/rpc/liverpc"
)
var _userRecCandidateKey = "rec_candidate_%d"
var _recommendOffsetKey = "rec_offset_%d"
// 已经推荐过的池子,用户+日期
var _recommendedKey = "recommended_%d_%s"
// RoomAPI room liverpc client
var RoomAPI *room_api.Client
// RelationAPI relation liverpc client
var RelationAPI *relation_api.Client
// Dao dao
type Dao struct {
c *conf.Config
redis *redis.Pool
}
func init() {
RoomAPI = room_api.New(getConf("room"))
RelationAPI = relation_api.New(getConf("relation"))
}
func getConf(appName string) *liverpc.ClientConfig {
c := conf.Conf.LiveRpc
if c != nil {
return c[appName]
}
return nil
}
// ClearRecommend 清空该用户相关的推荐缓存
func (d *Dao) ClearRecommend(ctx context.Context, uid int64) error {
candidateKey := fmt.Sprintf(_userRecCandidateKey, uid)
recommendedKey := fmt.Sprintf(_recommendedKey, uid, time.Now().Format("20060102"))
offsetKey := fmt.Sprintf(_recommendOffsetKey, uid)
conn := d.redis.Get(ctx)
defer conn.Close()
_, err := conn.Do("DEL", candidateKey, recommendedKey, offsetKey)
return errors.WithStack(err)
}
// New init mysql db
func New(c *conf.Config) (dao *Dao) {
dao = &Dao{
c: c,
redis: redis.NewPool(c.Redis),
}
return
}
// Close close the resource.
func (d *Dao) Close() {
d.redis.Close()
}
func (d *Dao) saveOffset(conn redis.Conn, uid int64, offset int) {
conn.Do("SETEX", fmt.Sprintf(_recommendOffsetKey, uid), 86400, offset)
}
func (d *Dao) addToRecommended(conn redis.Conn, uid int64, ids []int64) {
if len(ids) == 0 {
return
}
day := time.Now().Format("20060102")
key := fmt.Sprintf(_recommendedKey, uid, day)
var is []interface{}
is = append(is, key)
for _, id := range ids {
is = append(is, id)
}
conn.Send("EXPIRE", key, 86400)
conn.Send("SADD", is...)
conn.Flush()
conn.Receive()
_, err := conn.Receive()
if err != nil {
log.Info("addToRecommended error +%v", err)
}
}
// GetRandomRoomIds 随机获取count个推荐
// 如果总数量total比count小则返回total个
func (d *Dao) GetRandomRoomIds(ctx context.Context, uid int64, reqCount int, existRoomIDs []int64) (ret []int64, err error) {
if reqCount == 0 {
return
}
var (
candidateLen int
)
r := d.redis.Get(ctx)
defer r.Close()
candidateKey := fmt.Sprintf(_userRecCandidateKey, uid)
exists, err := redis.Int(r.Do("exists", candidateKey))
if err != nil {
err = errors.WithStack(err)
return
}
existMap := map[int64]struct{}{}
for _, id := range existRoomIDs {
existMap[id] = struct{}{}
}
if exists == 0 {
var candidate []int64
var currentOffset = 0
candidate, err = d.generateLrCandidateList(r, uid, candidateKey)
if err != nil {
return
}
Loop:
for len(ret) < reqCount && currentOffset < len(candidate) {
var tmp []int64
if len(candidate)-currentOffset < int(reqCount) {
tmp = candidate[currentOffset:]
} else {
tmp = candidate[currentOffset : currentOffset+reqCount]
}
//去重
for _, id := range tmp {
_, ok := existMap[id]
currentOffset += 1
if !ok {
ret = append(ret, id)
if len(ret) >= int(reqCount) {
break Loop
}
}
}
}
d.addToRecommended(r, uid, ret)
d.saveOffset(r, uid, currentOffset)
} else {
candidateLen, err = redis.Int(r.Do("LLEN", candidateKey))
if err != nil {
return
}
var offset int
offset, _ = redis.Int(r.Do("GET", fmt.Sprintf(_recommendOffsetKey, uid)))
if offset > (candidateLen - 1) {
return
}
var currentOffset = offset
Loop2:
for len(ret) < reqCount && currentOffset < candidateLen {
var ids []int64
ids, err = redis.Int64s(r.Do("LRANGE", candidateKey, currentOffset, currentOffset+reqCount-1))
if err != nil {
err = errors.WithStack(err)
return
}
// 去重
for _, id := range ids {
currentOffset++
_, ok := existMap[id]
if !ok {
ret = append(ret, id)
if len(ret) >= int(reqCount) {
break Loop2
}
}
}
if len(ids) == 0 {
log.Error("Cannot get recommend candidate, key=%s, offset=%d, count=%d", candidateKey, offset, reqCount)
break
}
}
d.addToRecommended(r, uid, ret)
d.saveOffset(r, uid, currentOffset)
}
return
}
// GetLrRecRoomIds 在GetRandomRoomIds的基础上进行LR计算并返回倒排的房间号列表
// 与GetRandomRoomIds有相同的输入输出结构
func (d *Dao) GetLrRecRoomIds(r redis.Conn, uid int64, candidateIds []int64) (ret []int64, err error) {
var areas string
areaIds := map[int64]struct{}{}
areas, err = redis.String(r.Do("GET", fmt.Sprintf(recconst.UserAreaKey, uid)))
if err != nil && err != redis.ErrNil {
log.Error("redis GET error: %v", err)
return
}
err = nil
if areas != "" {
split := strings.Split(areas, ";")
for _, areaIdStr := range split {
areaId, _ := strconv.ParseInt(areaIdStr, 10, 64)
areaIds[areaId] = struct{}{}
}
}
weightVector := makeWeightVec(d.c)
roomFeatures, ok := roomFeatureValue.Load().(map[int64][]int64)
if !ok {
ret = candidateIds
return
}
roomScoreSlice := ScoreSlice{}
for _, roomId := range candidateIds {
if fv, ok := roomFeatures[roomId]; ok {
featureVector := make([]int64, len(fv))
copy(featureVector, fv)
areaId := featureVector[0]
if _, ok := areaIds[areaId]; ok {
featureVector[0] = 1
} else {
featureVector[0] = 0
}
counter := Counter{roomId: roomId, score: calcScore(weightVector, featureVector)}
roomScoreSlice = append(roomScoreSlice, counter)
}
}
sort.Sort(roomScoreSlice)
for _, counter := range roomScoreSlice {
ret = append(ret, counter.roomId)
}
return
}
// generateCandidateList 得到候选集
func (d *Dao) generateCandidateList(r redis.Conn, uid int64, candidateKey string) (ret []int64, err error) {
// 第一步 itemcf优先级最高。
itemCFKey := fmt.Sprintf(recconst.UserItemCFRecKey, uid)
var itemCFList []int64
itemCFList, err = redis.Int64s(r.Do("ZREVRANGE", itemCFKey, 0, -1))
if err != nil {
err = errors.WithStack(err)
return
}
itemCFOnlineIds := d.FilterOnlineRoomIds(itemCFList)
if len(itemCFOnlineIds) == 0 {
log.Info("No item-cf room online for user, uid=%d, before online filter room ids: %+v", uid, itemCFList)
}
// 第二步 取兴趣分区的房间 人气超过100的房间
var areas string
areas, err = redis.String(r.Do("GET", fmt.Sprintf(recconst.UserAreaKey, uid)))
if err != nil && err != redis.ErrNil {
err = errors.WithStack(err)
return
}
err = nil
var areaRoomIDs []int64
if areas != "" {
split := strings.Split(areas, ";")
for _, areaIdStr := range split {
areaId, _ := strconv.ParseInt(areaIdStr, 10, 64)
var ids = d.getAreaRoomIds(areaId)
areaRoomIDs = append(areaRoomIDs, ids...)
}
}
// 第三步 取兴趣分区大分区的100个 先不做
// 第四步 减去已经推荐过的
day := time.Now().Format("20060102")
var recommendedList []int64
edKey := fmt.Sprintf(_recommendedKey, uid, day)
recommendedList, err = redis.Int64s(r.Do("SMEMBERS", edKey))
if err != nil {
err = errors.WithStack(err)
return
}
recommended := map[int64]struct{}{}
for _, id := range recommendedList {
recommended[id] = struct{}{}
}
var itemCFFinalIDs []int64
for _, id := range itemCFOnlineIds {
_, exist := recommended[id]
if !exist {
itemCFFinalIDs = append(itemCFFinalIDs, id)
}
}
var areaRoomFinalIDs []int64
for _, id := range areaRoomIDs {
_, exist := recommended[id]
if !exist {
areaRoomFinalIDs = append(areaRoomFinalIDs, id)
}
}
ret = mergeArr(itemCFFinalIDs, areaRoomFinalIDs)
log.Info("UserRecommend : uid=%d total=%d, "+
"itemcf.original=%d, itemcf.online=%d, itemcf.noviewd=%d, "+
"areaRoom.original=%d, itemcf.noviewd=%d viewed=%d",
uid, len(ret), len(itemCFList), len(itemCFOnlineIds), len(itemCFFinalIDs),
len(areaRoomIDs), len(areaRoomFinalIDs), len(recommendedList))
return
}
// generateCandidateList 得到进过LR的候选集
func (d *Dao) generateLrCandidateList(r redis.Conn, uid int64, candidateKey string) (ret []int64, err error) {
roomIDs, err := d.generateCandidateList(r, uid, candidateKey)
if err != nil {
log.Error("generateLrCandidateList failed 1, error:%v", err)
return
}
if len(ret) > 0 {
ret, err = d.GetLrRecRoomIds(r, uid, roomIDs)
if err != nil {
log.Error("generateLrCandidateList failed 2, error:%v", err)
return
}
}
// 召回源不足的情况下补足推荐房间数
if len(ret) < 150 {
ids, ok := recDefaultRoomIds.Load().([]int64)
if !ok {
return
}
ret1, err1 := d.GetLrRecRoomIds(r, uid, ids)
if err1 != nil {
log.Error("generateLrCandidateList failed 3, error:%v", err1)
return
}
ret = mergeArrWithOrder(ret, ret1, 150) // TODO:当前ret1的结果是没有过滤掉今天看过的房间的, 看后面是否需要优化
}
{
for _, roomID := range ret {
r.Send("RPUSH", candidateKey, roomID)
}
r.Send("EXPIRE", candidateKey, 60*2)
err = r.Flush()
if err != nil {
err = errors.WithStack(err)
return
}
for i := 0; i < len(ret)+1; i++ {
r.Receive()
}
}
return
}
// Ping dao ping
func (d *Dao) Ping(ctx context.Context) (err error) {
conn := d.redis.Get(ctx)
defer conn.Close()
_, err = conn.Do("ping")
if err != nil {
err = errors.Wrap(err, "dao Ping err")
}
return err
}
// Counter 房间-分数结构体, 用于构建一个可排序的slice
type Counter struct {
roomId int64
score float32
}
// ScoreSlice Counter对象的slice
type ScoreSlice []Counter
func (s ScoreSlice) Len() int {
return len(s)
}
func (s ScoreSlice) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
func (s ScoreSlice) Less(i, j int) bool {
return s[j].score < s[i].score
}
func calcScore(weightVector []float32, featureVector []int64) (score float32) {
if len(weightVector) != len(featureVector) {
panic(fmt.Sprintf("权重数量和特征数量不匹配, 请检查配置或逻辑, weight: %+v, feature: %+v", weightVector, featureVector))
}
for i := 0; i < min(len(weightVector), len(featureVector)); i++ {
score += weightVector[i] * float32(featureVector[i])
}
return
}
func min(x int, y int) int {
if x < y {
return x
}
return y
}
// 合并两个集合
func mergeArr(x []int64, y []int64) (ret []int64) {
tmpMap := map[int64]struct{}{}
for _, id := range x {
tmpMap[id] = struct{}{}
}
for _, id := range y {
tmpMap[id] = struct{}{}
}
for id := range tmpMap {
ret = append(ret, id)
}
return
}
// 按x, y的顺序合并两个集合, 当x的长度不小于limit则直接返回
func mergeArrWithOrder(x []int64, y []int64, limit int) (ret []int64) {
if len(x) >= limit {
ret = x
return
}
tmpMap := map[int64]struct{}{}
ret = append(ret, x...)
num := len(ret)
for _, id := range x {
tmpMap[id] = struct{}{}
}
for _, id := range y {
if _, ok := tmpMap[id]; ok {
continue
}
num += 1
tmpMap[id] = struct{}{}
ret = append(ret, id)
if num >= limit {
break
}
}
return
}
func makeWeightVec(c *conf.Config) (ret []float32) {
ret = append(ret, c.CommonFeature.UserAreaInterest.Weights...)
ret = append(ret, c.CommonFeature.FansNum.Weights...)
ret = append(ret, c.CommonFeature.CornerSign.Weights...)
ret = append(ret, c.CommonFeature.Online.Weights...)
return
}