|
@@ -1,5 +1,9 @@
|
|
|
package org.jeecg.modules.adweb.seo.service.dataforseo;
|
|
|
|
|
|
+import cn.hutool.json.JSONUtil;
|
|
|
+
|
|
|
+import com.google.common.collect.Lists;
|
|
|
+
|
|
|
import io.github.dataforseo.client.ApiClient;
|
|
|
import io.github.dataforseo.client.ApiException;
|
|
|
import io.github.dataforseo.client.api.SerpApi;
|
|
@@ -10,11 +14,12 @@ import jakarta.annotation.PostConstruct;
|
|
|
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
|
|
|
+import org.jeecg.common.util.RedisUtil;
|
|
|
import org.jeecg.modules.adweb.common.util.DateUtil;
|
|
|
import org.jeecg.modules.adweb.common.util.ListUtil;
|
|
|
import org.jeecg.modules.adweb.seo.entity.SeoKeywords;
|
|
|
-import org.jeecg.modules.adweb.seo.entity.SeoKeywordsSerp;
|
|
|
import org.jeecg.modules.adweb.seo.mapper.SeoKeywordsMapper;
|
|
|
+import org.jeecg.modules.adweb.seo.service.ISeoKeywordsSerpService;
|
|
|
import org.jeecg.modules.adweb.seo.service.ISeoKeywordsService;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.beans.factory.annotation.Value;
|
|
@@ -22,14 +27,23 @@ import org.springframework.stereotype.Service;
|
|
|
|
|
|
import java.util.Date;
|
|
|
import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
|
|
|
/**
|
|
|
+ * DataForSEO Serp查询 - 基于Redis同步
|
|
|
+ *
|
|
|
+ * <p>暂不使用DataForSEO callback实时更新方式,考虑稳定性
|
|
|
+ *
|
|
|
* @author wfansh
|
|
|
*/
|
|
|
@Slf4j
|
|
|
@Service
|
|
|
public class DataForSEOService {
|
|
|
|
|
|
+ private static final int MAX_TASKS_PER_SERP_REQUEST = 100;
|
|
|
+ private static final int SERP_REQUEST_CODE_SUCCESS = 20000;
|
|
|
+ private static final int SERP_TASK_CODE_SUCCESS = 20100;
|
|
|
+
|
|
|
@Value("${dataforseo.username}")
|
|
|
private String username;
|
|
|
|
|
@@ -43,14 +57,18 @@ public class DataForSEOService {
|
|
|
|
|
|
@Autowired private ISeoKeywordsService seoKeywordsService;
|
|
|
|
|
|
+ @Autowired private ISeoKeywordsSerpService seoKeywordsSerpService;
|
|
|
+
|
|
|
+ @Autowired private RedisUtil redisUtil;
|
|
|
+
|
|
|
private SerpApi serpApi;
|
|
|
|
|
|
@PostConstruct
|
|
|
private void init() {
|
|
|
ApiClient defaultClient = io.github.dataforseo.client.Configuration.getDefaultApiClient();
|
|
|
defaultClient.setBasePath(apiPath);
|
|
|
- // HTTP超时 - 30秒
|
|
|
- defaultClient.setConnectTimeout(30 * 1000);
|
|
|
+ // API超时 - 60秒
|
|
|
+ defaultClient.setConnectTimeout(60 * 1000);
|
|
|
|
|
|
// API认证方式 - basicAuth
|
|
|
HttpBasicAuth basicAuth = (HttpBasicAuth) defaultClient.getAuthentication("basicAuth");
|
|
@@ -61,50 +79,176 @@ public class DataForSEOService {
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * 从DataForSEO拉取keywords serp数据,同步到{@link SeoKeywordsSerp}表
|
|
|
+ * 全局查询更新DataForSEO keywords serp数据
|
|
|
*
|
|
|
* @param keywordType 1 - 指定词; 2 - 长尾词
|
|
|
+ * @param limit 最大查询条数
|
|
|
*/
|
|
|
- public void syncKeywordsSerp(int keywordType, int limit) throws ApiException {
|
|
|
- Date now = new Date();
|
|
|
-
|
|
|
- // 1. 查询待更新keywords
|
|
|
- List<SeoKeywords> seoKeywordsList =
|
|
|
- seoKeywordsMapper.getKeywordsToSerp(keywordType).subList(0, limit);
|
|
|
- if (ListUtil.isEmpty(seoKeywordsList)) {
|
|
|
- log.info("没有待Serp查询的关键词");
|
|
|
- return;
|
|
|
+ public void runKeywordsSerpTasks(int keywordType, int limit) {
|
|
|
+ List<SeoKeywords> seoKeywords = seoKeywordsMapper.getKeywordsToSerp(keywordType, limit);
|
|
|
+
|
|
|
+ if (ListUtil.isEmpty(seoKeywords)) {
|
|
|
+ log.info("暂无需要Serp查询的关键词");
|
|
|
+ } else {
|
|
|
+ // DataForSEO - each POST call containing no more than 100 tasks
|
|
|
+ // https://docs.dataforseo.com/v3/serp/google/organic/task_post/?bash
|
|
|
+ Lists.partition(seoKeywords, MAX_TASKS_PER_SERP_REQUEST).forEach(this::sendSerpRequest);
|
|
|
}
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 向DataForSEO发送Serp请求
|
|
|
+ *
|
|
|
+ * <p>1. 将taskId保存到Redis
|
|
|
+ *
|
|
|
+ * <p>2. 更新{@link SeoKeywords}表
|
|
|
+ */
|
|
|
+ private void sendSerpRequest(List<SeoKeywords> seoKeywords) {
|
|
|
+ try {
|
|
|
+ Date now = new Date();
|
|
|
+
|
|
|
+ // 1. 创建DataForSEO Serp查询请求,每个请求最多包含100个任务
|
|
|
+ List<SerpTaskRequestInfo> serpTaskRequestInfoList = Lists.newArrayList();
|
|
|
+ for (SeoKeywords seoKeyword :
|
|
|
+ seoKeywords.subList(
|
|
|
+ 0, Math.min(seoKeywords.size(), MAX_TASKS_PER_SERP_REQUEST))) {
|
|
|
+ SerpTaskRequestInfo serpTaskRequestInfo = new SerpTaskRequestInfo();
|
|
|
+ serpTaskRequestInfo.setKeyword(seoKeyword.getKeywords());
|
|
|
+ serpTaskRequestInfo.setSeDomain("google.com");
|
|
|
+ serpTaskRequestInfo.setLanguageCode("en");
|
|
|
+ serpTaskRequestInfo.setLocationCode(2840); // 美国
|
|
|
+ serpTaskRequestInfo.setTag(Integer.toString(seoKeyword.getId())); // tag = keywordId
|
|
|
+ serpTaskRequestInfoList.add(serpTaskRequestInfo);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 2. 发送DataForSEO Serp查询请求,验证并打印响应结果
|
|
|
+ SerpGoogleOrganicTaskPostResponseInfo serpTaskPostResponseInfo =
|
|
|
+ serpApi.googleOrganicTaskPost(serpTaskRequestInfoList);
|
|
|
+ log.info(
|
|
|
+ "创建DataForSEO Serp任务,response = {}",
|
|
|
+ JSONUtil.toJsonStr(serpTaskPostResponseInfo));
|
|
|
+ if (serpTaskPostResponseInfo.getStatusCode() != SERP_REQUEST_CODE_SUCCESS) {
|
|
|
+ log.error(serpTaskPostResponseInfo.getStatusMessage());
|
|
|
+ throw new ApiException(serpTaskPostResponseInfo.getStatusMessage());
|
|
|
+ }
|
|
|
+
|
|
|
+ // 3. 过滤状态为成功的Serp task
|
|
|
+ List<SerpGoogleOrganicTaskPostTaskInfo> serpingTasks =
|
|
|
+ serpTaskPostResponseInfo.getTasks().stream()
|
|
|
+ .filter(task -> task.getStatusCode() == SERP_TASK_CODE_SUCCESS)
|
|
|
+ .toList();
|
|
|
+ List<Integer> serpingKeywordIds = Lists.newArrayList();
|
|
|
+
|
|
|
+ // 4. 将正在查询的Serp task放进Redis
|
|
|
+ for (SerpGoogleOrganicTaskPostTaskInfo serpingTask : serpingTasks) {
|
|
|
+ Map<String, String> data = (Map<String, String>) serpingTask.getData();
|
|
|
+ String keywordId = data.get("tag");
|
|
|
+ redisUtil.set(this.getSerpTaskRedisKey(keywordId), serpingTask.getId());
|
|
|
+
|
|
|
+ serpingKeywordIds.add(Integer.parseInt(keywordId));
|
|
|
+ }
|
|
|
+
|
|
|
+ // 5. 更新SeoKeywords表
|
|
|
+ List<SeoKeywords> serpingKeywords =
|
|
|
+ seoKeywords.stream()
|
|
|
+ .filter(keyword -> serpingKeywordIds.contains(keyword.getId()))
|
|
|
+ .toList();
|
|
|
+ serpingKeywords.forEach(
|
|
|
+ seoKeyword -> {
|
|
|
+ // TODO: Why
|
|
|
+ seoKeyword.setTimerLastSearchTime(DateUtil.getTodayZeroTime(now));
|
|
|
+ // 状态 -> 正在查询
|
|
|
+ seoKeyword.setSearchStatus(1);
|
|
|
+ });
|
|
|
+ seoKeywordsService.updateBatchById(serpingKeywords);
|
|
|
+
|
|
|
+ log.info(
|
|
|
+ "{}个关键词Serp查询任务创建完成 {}",
|
|
|
+ serpingKeywords.size(),
|
|
|
+ serpingKeywords.stream().map(SeoKeywords::getId).toList());
|
|
|
+ } catch (ApiException e) {
|
|
|
+ log.error(e.getMessage(), e);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- // 2. 发送DataForSEO Serp查询请求
|
|
|
- // List<SerpGoogleOrganicLiveAdvancedRequestInfo> serpTasks =
|
|
|
- // seoKeywordsList.stream()
|
|
|
- // .map(
|
|
|
- // seoKeyword -> {
|
|
|
- // SerpGoogleOrganicLiveAdvancedRequestInfo serpTask =
|
|
|
- // new
|
|
|
- // SerpGoogleOrganicLiveAdvancedRequestInfo();
|
|
|
- // serpTask.setKeyword(seoKeyword.getKeywords());
|
|
|
- // serpTask.setTag(Integer.toString(seoKeyword.getId()));
|
|
|
- // return serpTask;
|
|
|
- // })
|
|
|
- // .toList();
|
|
|
- //
|
|
|
- // SerpGoogleOrganicLiveAdvancedResponseInfo serpResults =
|
|
|
- // serpApi.googleOrganicLiveAdvanced(serpTasks);
|
|
|
-
|
|
|
- // 3. 更新SeoKeywords表
|
|
|
- seoKeywordsList.forEach(
|
|
|
- seoKeyword -> {
|
|
|
- // TODO: why?
|
|
|
- seoKeyword.setTimerLastSearchTime(DateUtil.getTodayZeroTime(now));
|
|
|
- // On search.
|
|
|
- seoKeyword.setSearchStatus(1);
|
|
|
- });
|
|
|
- seoKeywordsService.updateBatchById(seoKeywordsList);
|
|
|
- log.info(
|
|
|
- "{}个关键词serp查询任务创建完成 {}",
|
|
|
- seoKeywordsList.size(),
|
|
|
- seoKeywordsList.stream().map(SeoKeywords::getId).toList());
|
|
|
+ // /** 处理Serp查询结果 */
|
|
|
+ // public boolean onSerpResult(String taskId) throws Exception {
|
|
|
+ // SerpGoogleOrganicTaskGetAdvancedTaskInfo serpTask =
|
|
|
+ // serpApi.googleOrganicTaskGetAdvanced(taskId).getTasks().get(0);
|
|
|
+ // Map<String, String> data = (Map<String, String>) serpTask.getData();
|
|
|
+ // SerpGoogleOrganicTaskGetAdvancedResultInfo serpResult = serpTask.getResult().get(0);
|
|
|
+ //
|
|
|
+ // int keywordId = Integer.parseInt(data.get("tag"));
|
|
|
+ // String seDomain = serpResult.getSeDomain();
|
|
|
+ // String checkUrl = serpResult.getCheckUrl();
|
|
|
+ // Date seDatetime =
|
|
|
+ // DateUtil.parseDate(serpResult.getDatetime(),
|
|
|
+ // DateUtil.ZONED_DATE_TIME_PATTERN);
|
|
|
+ //
|
|
|
+ // // 1. 查询keyword
|
|
|
+ // SeoKeywords seoKeyword = seoKeywordsService.getById(keywordId);
|
|
|
+ // if (Objects.isNull(seoKeyword)) {
|
|
|
+ // log.info("无法获取关键词 id = {}", keywordId);
|
|
|
+ // return false;
|
|
|
+ // }
|
|
|
+ // String topPrivateDomain = CommonUtil.getTopPrivateDomain(seoKeyword.getDomain()); //
|
|
|
+ // 顶级域名
|
|
|
+ // OrganicSerpElementItem serpItem =
|
|
|
+ // serpResult.getItems().stream()
|
|
|
+ // .map(item -> (OrganicSerpElementItem) item)
|
|
|
+ // .filter(item -> item.getType().equalsIgnoreCase("organic"))
|
|
|
+ // .filter(item -> item.getDomain().contains(topPrivateDomain))
|
|
|
+ // .findAny()
|
|
|
+ // .orElse(null);
|
|
|
+ //
|
|
|
+ // // 2. 更新SeoKeywords表
|
|
|
+ // UpdateWrapper<SeoKeywords> seoKeywordsUpdateWrapper = new UpdateWrapper<>();
|
|
|
+ // seoKeywordsUpdateWrapper.eq("id", keywordId);
|
|
|
+ // // TODO: why?
|
|
|
+ // seoKeywordsUpdateWrapper.set("last_search_time", seDatetime);
|
|
|
+ // seoKeywordsUpdateWrapper.set("last_rank", 0);
|
|
|
+ // // 查询结束
|
|
|
+ // seoKeywordsUpdateWrapper.set("search_status", 0);
|
|
|
+ // if (Objects.nonNull(serpItem)) {
|
|
|
+ // seoKeywordsUpdateWrapper.set(
|
|
|
+ // "position_url", StringUtils.removeEnd(serpItem.getUrl(), "/"));
|
|
|
+ // seoKeywordsUpdateWrapper.set("last_rank", serpItem.getRankGroup());
|
|
|
+ // }
|
|
|
+ // seoKeywordsService.update(seoKeywordsUpdateWrapper);
|
|
|
+ //
|
|
|
+ // // 3. 更新SeoSerp表
|
|
|
+ // // 3.1 补充与上次更新时间之间的数据
|
|
|
+ // seoKeywordsSerpService.fillKeywordsSerpHistory(keywordId, seDatetime);
|
|
|
+ // // 3.2 更新Serp表
|
|
|
+ // SeoKeywordsSerp seoKeywordsSerp =
|
|
|
+ // seoKeywordsSerpService
|
|
|
+ // .list(
|
|
|
+ // new LambdaQueryWrapper<SeoKeywordsSerp>()
|
|
|
+ // .eq(SeoKeywordsSerp::getKeywordsId, keywordId)
|
|
|
+ // .eq(
|
|
|
+ // SeoKeywordsSerp::getSeDate,
|
|
|
+ // DateUtil.formatDate(
|
|
|
+ // seDatetime, DateUtil.DATE_PATTERN)))
|
|
|
+ // .stream()
|
|
|
+ // .findFirst()
|
|
|
+ // .orElse(new SeoKeywordsSerp());
|
|
|
+ //
|
|
|
+ // seoKeywordsSerp.setKeywordsId(keywordsId);
|
|
|
+ // seoKeywordsSerp.setSearchUrl(checkUrl);
|
|
|
+ // seoKeywordsSerp.setSeDomain(seDomain);
|
|
|
+ // seoKeywordsSerp.setLanguageCode(seoKeywords.getLang());
|
|
|
+ // seoKeywordsSerp.setType("organic_results");
|
|
|
+ // seoKeywordsSerp.setRankAbsolute(rank);
|
|
|
+ // seoKeywordsSerp.setPageNumber(rank / PAGE_SIZE + 1);
|
|
|
+ // seoKeywordsSerp.setRankType(rank);
|
|
|
+ // seoKeywordsSerp.setSeDate(DateUtil.formatDate(seDatetime, DateUtil.DATE_PATTERN));
|
|
|
+ // seoKeywordsSerp.setSeDatetime(seDatetime);
|
|
|
+ //
|
|
|
+ // seoKeywordsSerpService.saveOrUpdate(seoKeywordsSerp);
|
|
|
+ // return true;
|
|
|
+ // }
|
|
|
+
|
|
|
+ private String getSerpTaskRedisKey(String keywordId) {
|
|
|
+ return String.format("serp_task:%s", keywordId);
|
|
|
}
|
|
|
}
|