|
@@ -1,5 +1,12 @@
|
|
|
package org.jeecg.modules.adweb.seo.service.dataforseo;
|
|
|
|
|
|
+import cn.hutool.core.collection.CollectionUtil;
|
|
|
+import cn.hutool.json.JSONUtil;
|
|
|
+
|
|
|
+import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
|
|
+import com.baomidou.mybatisplus.core.conditions.update.UpdateWrapper;
|
|
|
+import com.google.common.collect.Lists;
|
|
|
+
|
|
|
import io.github.dataforseo.client.ApiClient;
|
|
|
import io.github.dataforseo.client.ApiException;
|
|
|
import io.github.dataforseo.client.api.SerpApi;
|
|
@@ -10,25 +17,44 @@ import jakarta.annotation.PostConstruct;
|
|
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.jeecg.modules.adweb.common.util.AdwebRedisUtil;
|
|
|
+import org.jeecg.modules.adweb.common.util.CommonUtil;
|
|
|
import org.jeecg.modules.adweb.common.util.DateUtil;
|
|
|
import org.jeecg.modules.adweb.common.util.ListUtil;
|
|
|
import org.jeecg.modules.adweb.seo.entity.SeoKeywords;
|
|
|
import org.jeecg.modules.adweb.seo.entity.SeoKeywordsSerp;
|
|
|
import org.jeecg.modules.adweb.seo.mapper.SeoKeywordsMapper;
|
|
|
+import org.jeecg.modules.adweb.seo.service.ISeoKeywordsSerpService;
|
|
|
import org.jeecg.modules.adweb.seo.service.ISeoKeywordsService;
|
|
|
+import org.jeecg.modules.adweb.site.entity.AdwebSite;
|
|
|
+import org.jeecg.modules.adweb.site.service.IAdwebSiteService;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
|
|
|
-import java.util.Date;
|
|
|
-import java.util.List;
|
|
|
+import java.util.*;
|
|
|
|
|
|
/**
|
|
|
+ * DataForSEO Serp查询 - 基于Redis定时同步
|
|
|
+ *
|
|
|
+ * <p>1. 暂不使用DataForSEO callback实时更新方式,考虑AdWeb API稳定性
|
|
|
+ *
|
|
|
+ * <p>2. 暂不使用DataForSEO serp/google/organic/tasks_ready API - 每次仅返回一个task,与文档不符
|
|
|
+ *
|
|
|
* @author wfansh
|
|
|
*/
|
|
|
@Slf4j
|
|
|
@Service
|
|
|
public class DataForSEOService {
|
|
|
+ private static final int MAX_TASKS_PER_SERP_REQUEST = 100;
|
|
|
+ private static final int SERP_STATUS_CODE_SUCCESS = 20000;
|
|
|
+ private static final int SERP_STATUS_CODE_TASK_CREATED = 20100;
|
|
|
+ private static final int SERP_STATUS_CODE_TASK_HANDED = 40601;
|
|
|
+ private static final int SERP_STATUS_CODE_TASK_IN_QUEUE = 40602;
|
|
|
+
|
|
|
+ // Google一页显示搜索结果数量
|
|
|
+ private static final int GOOGLE_SEARCH_PAGE_SIZE = 10;
|
|
|
|
|
|
@Value("${dataforseo.username}")
|
|
|
private String username;
|
|
@@ -39,18 +65,24 @@ public class DataForSEOService {
|
|
|
@Value("${dataforseo.api-path}")
|
|
|
private String apiPath;
|
|
|
|
|
|
+ @Autowired private IAdwebSiteService adwebSiteService;
|
|
|
+
|
|
|
@Autowired private SeoKeywordsMapper seoKeywordsMapper;
|
|
|
|
|
|
@Autowired private ISeoKeywordsService seoKeywordsService;
|
|
|
|
|
|
+ @Autowired private ISeoKeywordsSerpService seoKeywordsSerpService;
|
|
|
+
|
|
|
+ @Autowired private AdwebRedisUtil redisUtil;
|
|
|
+
|
|
|
private SerpApi serpApi;
|
|
|
|
|
|
@PostConstruct
|
|
|
private void init() {
|
|
|
ApiClient defaultClient = io.github.dataforseo.client.Configuration.getDefaultApiClient();
|
|
|
defaultClient.setBasePath(apiPath);
|
|
|
- // HTTP超时 - 30秒
|
|
|
- defaultClient.setConnectTimeout(30 * 1000);
|
|
|
+ // API超时 - 60秒
|
|
|
+ defaultClient.setConnectTimeout(60 * 1000);
|
|
|
|
|
|
// API认证方式 - basicAuth
|
|
|
HttpBasicAuth basicAuth = (HttpBasicAuth) defaultClient.getAuthentication("basicAuth");
|
|
@@ -61,50 +93,234 @@ public class DataForSEOService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 从DataForSEO拉取keywords serp数据,同步到{@link SeoKeywordsSerp}表
|
|
|
+ * 全局查询更新DataForSEO keywords Serp数据 - 启动Serp任务并保存到Redis
|
|
|
*
|
|
|
* @param keywordType 1 - 指定词; 2 - 长尾词
|
|
|
+ * @param limit 最大查询条数
|
|
|
+ */
|
|
|
+ public void runKeywordsSerpTasks(List<String> siteCodes, int keywordType, int limit) {
|
|
|
+ if (ListUtil.isEmpty(siteCodes)) {
|
|
|
+ siteCodes =
|
|
|
+ adwebSiteService
|
|
|
+ .list(
|
|
|
+ new LambdaQueryWrapper<AdwebSite>()
|
|
|
+ .eq(AdwebSite::getStatus, 1)
|
|
|
+ .eq(AdwebSite::getRunStatus, 1))
|
|
|
+ .stream()
|
|
|
+ .map(AdwebSite::getCode)
|
|
|
+ .toList();
|
|
|
+ }
|
|
|
+
|
|
|
+ List<SeoKeywords> seoKeywords =
|
|
|
+ seoKeywordsMapper.getKeywordsToSerp(siteCodes, keywordType, limit);
|
|
|
+
|
|
|
+ if (ListUtil.isEmpty(seoKeywords)) {
|
|
|
+ log.info("暂无需要Serp查询的关键词");
|
|
|
+ } else {
|
|
|
+ // DataForSEO - each POST call containing no more than 100 tasks
|
|
|
+ // https://docs.dataforseo.com/v3/serp/google/organic/task_post
|
|
|
+ Lists.partition(seoKeywords, MAX_TASKS_PER_SERP_REQUEST).forEach(this::sendSerpRequest);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /** 同步DataForSEO Serp结果 - 从Redis中获取正在运行的任务 */
|
|
|
+ public void syncKeywordsSerpResults() {
|
|
|
+ Set<String> serpTaskRedisKeys = redisUtil.keys(this.getSerpTaskRedisKey("*"));
|
|
|
+
|
|
|
+ if (CollectionUtil.isEmpty(serpTaskRedisKeys)) {
|
|
|
+ log.info("Redis中暂无需要同步的Serp关键词");
|
|
|
+ } else {
|
|
|
+ for (String serpTaskRedisKey : serpTaskRedisKeys) {
|
|
|
+ this.onSerpResult(serpTaskRedisKey);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 向DataForSEO发送Serp请求
|
|
|
+ *
|
|
|
+ * <p>1. 将taskId保存到Redis
|
|
|
+ *
|
|
|
+ * <p>2. 更新{@link SeoKeywords}表
|
|
|
+ */
|
|
|
+ private void sendSerpRequest(List<SeoKeywords> seoKeywords) {
|
|
|
+ try {
|
|
|
+ Date now = new Date();
|
|
|
+
|
|
|
+ // 1. 创建DataForSEO Serp查询请求,每个请求最多包含100个任务
|
|
|
+ List<SerpTaskRequestInfo> serpTaskRequestInfoList = Lists.newArrayList();
|
|
|
+ for (SeoKeywords seoKeyword :
|
|
|
+ seoKeywords.subList(
|
|
|
+ 0, Math.min(seoKeywords.size(), MAX_TASKS_PER_SERP_REQUEST))) {
|
|
|
+ SerpTaskRequestInfo serpTaskRequestInfo = new SerpTaskRequestInfo();
|
|
|
+ serpTaskRequestInfo.setKeyword(seoKeyword.getKeywords());
|
|
|
+ serpTaskRequestInfo.setSeDomain("google.com");
|
|
|
+ serpTaskRequestInfo.setLanguageCode(
|
|
|
+ StringUtils.defaultIfEmpty(seoKeyword.getLang(), "en"));
|
|
|
+ serpTaskRequestInfo.setLocationCode(2840); // 美国
|
|
|
+ serpTaskRequestInfo.setTag(Integer.toString(seoKeyword.getId())); // tag = keywordId
|
|
|
+ serpTaskRequestInfoList.add(serpTaskRequestInfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 发送DataForSEO Serp查询请求,验证并记录响应结果
|
|
|
+ SerpGoogleOrganicTaskPostResponseInfo serpTaskPostResponseInfo =
|
|
|
+ serpApi.googleOrganicTaskPost(serpTaskRequestInfoList);
|
|
|
+ log.info(
|
|
|
+ "创建DataForSEO Serp任务,response = {}",
|
|
|
+ JSONUtil.toJsonStr(serpTaskPostResponseInfo));
|
|
|
+ if (serpTaskPostResponseInfo.getStatusCode() != SERP_STATUS_CODE_SUCCESS) {
|
|
|
+ throw new ApiException(serpTaskPostResponseInfo.getStatusMessage());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 过滤创建成功的Serp tasks,保存到Redis
|
|
|
+ List<SerpGoogleOrganicTaskPostTaskInfo> serpingTasks =
|
|
|
+ serpTaskPostResponseInfo.getTasks().stream()
|
|
|
+ .filter(task -> task.getStatusCode() == SERP_STATUS_CODE_TASK_CREATED)
|
|
|
+ .toList();
|
|
|
+ List<Integer> serpingKeywordIds = Lists.newArrayList();
|
|
|
+
|
|
|
+ for (SerpGoogleOrganicTaskPostTaskInfo serpingTask : serpingTasks) {
|
|
|
+ Map<String, String> data = (Map<String, String>) serpingTask.getData();
|
|
|
+ String keywordId = data.get("tag");
|
|
|
+ redisUtil.set(this.getSerpTaskRedisKey(keywordId), serpingTask.getId());
|
|
|
+
|
|
|
+ serpingKeywordIds.add(Integer.parseInt(keywordId));
|
|
|
+ }
|
|
|
+
|
|
|
+ // 4. 更新seo_keywords表
|
|
|
+ List<SeoKeywords> serpingKeywords =
|
|
|
+ seoKeywords.stream()
|
|
|
+ .filter(keyword -> serpingKeywordIds.contains(keyword.getId()))
|
|
|
+ .toList();
|
|
|
+ serpingKeywords.forEach(
|
|
|
+ seoKeyword -> {
|
|
|
+ seoKeyword.setTimerLastSearchTime(now); // 定时器执行时间
|
|
|
+ seoKeyword.setSearchStatus(1); // 状态 -> 正在查询
|
|
|
+ });
|
|
|
+ seoKeywordsService.updateBatchById(serpingKeywords);
|
|
|
+
|
|
|
+ log.info(
|
|
|
+ "{}个关键词Serp查询任务创建完成 - {}",
|
|
|
+ serpingKeywords.size(),
|
|
|
+ serpingKeywords.stream().map(SeoKeywords::getId).toList());
|
|
|
+ } catch (ApiException e) {
|
|
|
+ log.error("创建DataForSEO Serp任务失败", e);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 处理DataForSEO Serp查询结果
|
|
|
+ *
|
|
|
+ * <p>1. 更新{@link SeoKeywords}表
|
|
|
+ *
|
|
|
+ * <p>2. 更新{@link SeoKeywordsSerp}表
|
|
|
+ *
|
|
|
+ * <p>3. 从Redis删除taskId
|
|
|
*/
|
|
|
- public void syncKeywordsSerp(int keywordType, int limit) throws ApiException {
|
|
|
- Date now = new Date();
|
|
|
-
|
|
|
- // 1. 查询待更新keywords
|
|
|
- List<SeoKeywords> seoKeywordsList =
|
|
|
- seoKeywordsMapper.getKeywordsToSerp(keywordType).subList(0, limit);
|
|
|
- if (ListUtil.isEmpty(seoKeywordsList)) {
|
|
|
- log.info("没有待Serp查询的关键词");
|
|
|
- return;
|
|
|
+ private boolean onSerpResult(String serpTaskRedisKey) {
|
|
|
+ try {
|
|
|
+ // 1. 查询Serp task
|
|
|
+ String taskId = redisUtil.getString(serpTaskRedisKey);
|
|
|
+ SerpGoogleOrganicTaskGetRegularTaskInfo serpTask =
|
|
|
+ serpApi.googleOrganicTaskGetRegular(taskId).getTasks().get(0);
|
|
|
+ log.info("获取DataForSEO Serp任务,response = {}", JSONUtil.toJsonStr(serpTask));
|
|
|
+ if (serpTask.getStatusCode() != SERP_STATUS_CODE_SUCCESS) {
|
|
|
+ log.info(
|
|
|
+ "DataForSEO Serp任务 {} 状态为 {} {}",
|
|
|
+ taskId,
|
|
|
+ serpTask.getStatusCode(),
|
|
|
+ serpTask.getStatusMessage());
|
|
|
+ // Serp task正在处理中...
|
|
|
+ if (Arrays.asList(SERP_STATUS_CODE_TASK_HANDED, SERP_STATUS_CODE_TASK_IN_QUEUE)
|
|
|
+ .contains(serpTask.getStatusCode())) {
|
|
|
+ return true;
|
|
|
+ }
|
|
|
+ throw new ApiException(serpTask.getStatusMessage());
|
|
|
+ }
|
|
|
+ SerpGoogleOrganicTaskGetRegularResultInfo serpResult = serpTask.getResult().get(0);
|
|
|
+
|
|
|
+ // 2. 查询seo_keywords表,根据域名过滤Serp result items
|
|
|
+ int keywordId = Integer.parseInt(this.getKeywordIdFromRedisKey(serpTaskRedisKey));
|
|
|
+ SeoKeywords seoKeyword = seoKeywordsService.getById(keywordId);
|
|
|
+ if (Objects.isNull(seoKeyword)) {
|
|
|
+ log.info("无法获取关键词 ID = {}", keywordId);
|
|
|
+
|
|
|
+ redisUtil.del(serpTaskRedisKey);
|
|
|
+ return false;
|
|
|
+ }
|
|
|
+
|
|
|
+ String topPrivateDomain =
|
|
|
+ CommonUtil.getTopPrivateDomain(seoKeyword.getDomain()); // 顶级域名
|
|
|
+ OrganicSerpElementItem serpItem =
|
|
|
+ serpResult.getItems().stream()
|
|
|
+ .filter(OrganicSerpElementItem.class::isInstance)
|
|
|
+ .map(OrganicSerpElementItem.class::cast)
|
|
|
+ .filter(item -> item.getDomain().contains(topPrivateDomain)) // 根据域名匹配
|
|
|
+ .findAny()
|
|
|
+ .orElse(null);
|
|
|
+
|
|
|
+ // 读取Serp相关数据
|
|
|
+ Date seDatetime =
|
|
|
+ DateUtil.parseDate(serpResult.getDatetime(), DateUtil.ZONED_DATE_TIME_PATTERN);
|
|
|
+ String positionUrl =
|
|
|
+ Objects.nonNull(serpItem)
|
|
|
+ ? StringUtils.removeEnd(serpItem.getUrl(), "/")
|
|
|
+ : null;
|
|
|
+ int rankGroup = Objects.nonNull(serpItem) ? serpItem.getRankGroup() : 0;
|
|
|
+ int rankAbsolute = Objects.nonNull(serpItem) ? serpItem.getRankAbsolute() : 0;
|
|
|
+
|
|
|
+ // 3.更新seo_keywords表
|
|
|
+ UpdateWrapper<SeoKeywords> seoKeywordsUpdateWrapper = new UpdateWrapper<>();
|
|
|
+ seoKeywordsUpdateWrapper.eq("id", keywordId);
|
|
|
+ seoKeywordsUpdateWrapper.set("last_search_time", seDatetime);
|
|
|
+ seoKeywordsUpdateWrapper.set("position_url", positionUrl);
|
|
|
+ seoKeywordsUpdateWrapper.set("last_rank", rankGroup);
|
|
|
+ seoKeywordsUpdateWrapper.set("search_status", 0); // 状态 -> 查询结束
|
|
|
+ seoKeywordsService.update(seoKeywordsUpdateWrapper);
|
|
|
+
|
|
|
+ // 4. 更新seo_keywords_serp表
|
|
|
+ // 4.1 填充与上次更新时间之间的数据, 截止到seDatetime的前一天
|
|
|
+ seoKeywordsSerpService.fillKeywordsSerpHistory(keywordId, seDatetime);
|
|
|
+ // 4.2 更新Serp表
|
|
|
+ SeoKeywordsSerp keywordSerp =
|
|
|
+ seoKeywordsSerpService
|
|
|
+ // 如果seDatetime当天有数据,则覆盖
|
|
|
+ .list(
|
|
|
+ new LambdaQueryWrapper<SeoKeywordsSerp>()
|
|
|
+ .eq(SeoKeywordsSerp::getKeywordsId, keywordId)
|
|
|
+ .eq(
|
|
|
+ SeoKeywordsSerp::getSeDate,
|
|
|
+ DateUtil.formatDate(
|
|
|
+ seDatetime, DateUtil.DATE_PATTERN)))
|
|
|
+ .stream()
|
|
|
+ .findFirst()
|
|
|
+ .orElse(new SeoKeywordsSerp());
|
|
|
+
|
|
|
+ keywordSerp.setKeywordsId(keywordId);
|
|
|
+ keywordSerp.setSearchUrl(serpResult.getCheckUrl());
|
|
|
+ keywordSerp.setSeDomain(serpResult.getSeDomain());
|
|
|
+ keywordSerp.setLanguageCode(serpResult.getLanguageCode());
|
|
|
+ keywordSerp.setType("organic_results");
|
|
|
+ keywordSerp.setPageNumber(rankGroup > 0 ? rankGroup / GOOGLE_SEARCH_PAGE_SIZE + 1 : 0);
|
|
|
+ keywordSerp.setRankGroup(rankGroup);
|
|
|
+ keywordSerp.setRankAbsolute(rankAbsolute);
|
|
|
+ keywordSerp.setSeDate(DateUtil.formatDateStr(seDatetime, DateUtil.DATE_PATTERN));
|
|
|
+ keywordSerp.setSeDatetime(seDatetime);
|
|
|
+ seoKeywordsSerpService.save(keywordSerp);
|
|
|
+
|
|
|
+ redisUtil.del(serpTaskRedisKey);
|
|
|
+ return true;
|
|
|
+ } catch (ApiException e) {
|
|
|
+ log.error("同步DataForSEO Serp任务失败", e);
|
|
|
+ return false;
|
|
|
}
|
|
|
+ }
|
|
|
+
|
|
|
+ private String getSerpTaskRedisKey(String keywordId) {
|
|
|
+ return String.format("serp_task:%s", keywordId);
|
|
|
+ }
|
|
|
|
|
|
- // 2. 发送DataForSEO Serp查询请求
|
|
|
- // List<SerpGoogleOrganicLiveAdvancedRequestInfo> serpTasks =
|
|
|
- // seoKeywordsList.stream()
|
|
|
- // .map(
|
|
|
- // seoKeyword -> {
|
|
|
- // SerpGoogleOrganicLiveAdvancedRequestInfo serpTask =
|
|
|
- // new
|
|
|
- // SerpGoogleOrganicLiveAdvancedRequestInfo();
|
|
|
- // serpTask.setKeyword(seoKeyword.getKeywords());
|
|
|
- // serpTask.setTag(Integer.toString(seoKeyword.getId()));
|
|
|
- // return serpTask;
|
|
|
- // })
|
|
|
- // .toList();
|
|
|
- //
|
|
|
- // SerpGoogleOrganicLiveAdvancedResponseInfo serpResults =
|
|
|
- // serpApi.googleOrganicLiveAdvanced(serpTasks);
|
|
|
-
|
|
|
- // 3. 更新SeoKeywords表
|
|
|
- seoKeywordsList.forEach(
|
|
|
- seoKeyword -> {
|
|
|
- // TODO: why?
|
|
|
- seoKeyword.setTimerLastSearchTime(DateUtil.getTodayZeroTime(now));
|
|
|
- // On search.
|
|
|
- seoKeyword.setSearchStatus(1);
|
|
|
- });
|
|
|
- seoKeywordsService.updateBatchById(seoKeywordsList);
|
|
|
- log.info(
|
|
|
- "{}个关键词serp查询任务创建完成 {}",
|
|
|
- seoKeywordsList.size(),
|
|
|
- seoKeywordsList.stream().map(SeoKeywords::getId).toList());
|
|
|
+ private String getKeywordIdFromRedisKey(String serpTaskRedisKey) {
|
|
|
+ return serpTaskRedisKey.split(":")[1];
|
|
|
}
|
|
|
}
|