Răsfoiți Sursa

Merge branch 'dataforseo' of wangfan/adweb3-server into master

wangfan 5 luni în urmă
părinte
comite
034f94a3ab

+ 34 - 0
jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/adweb/common/util/CommonUtil.java

@@ -0,0 +1,34 @@
+package org.jeecg.modules.adweb.common.util;
+
+import com.google.common.net.InternetDomainName;
+
+import lombok.extern.slf4j.Slf4j;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+
+/**
+ * 常用工具类
+ *
+ * @author wfansh
+ */
+@Slf4j
+public class CommonUtil {
+
+    /**
+     * 解析URL的顶级域名
+     *
+     * @param url 例如 https://www.essaynerdie.com, www.essaynerdie.com,
+     *     https://www.essaynerdie.com/products
+     * @return essaynerdie.com
+     */
+    public static String getTopPrivateDomain(String url) {
+        String host = url;
+        try {
+            host = new URL(url).getHost();
+        } catch (MalformedURLException e) {
+        } finally {
+            return InternetDomainName.from(host).topPrivateDomain().toString();
+        }
+    }
+}

+ 36 - 14
jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/adweb/common/util/DateUtil.java

@@ -1,9 +1,7 @@
 package org.jeecg.modules.adweb.common.util;
 
-
 import com.xkcoding.http.util.StringUtil;
 
-
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
 import java.time.LocalDate;
@@ -20,16 +18,16 @@ import java.util.*;
  */
 public class DateUtil {
 
-    /**
-     * 时间格式(yyyy-MM-dd)
-     */
-    public final static String DATE_PATTERN = "yyyy-MM-dd";
-    /**
-     * 时间格式(yyyy-MM-dd HH:mm:ss)
-     */
-    public final static String DATE_TIME_PATTERN = "yyyy-MM-dd HH:mm:ss";
+    /** 时间格式(yyyy-MM-dd) */
+    public static final String DATE_PATTERN = "yyyy-MM-dd";
 
-    public final static String SUBJECT_DATE = "yyyy/MM/dd";
+    /** 时间格式(yyyy-MM-dd HH:mm:ss) */
+    public static final String DATE_TIME_PATTERN = "yyyy-MM-dd HH:mm:ss";
+
+    /** 带时区的时间格式(yyyy-MM-dd HH:mm:ss +00:00) */
+    public static final String ZONED_DATE_TIME_PATTERN = "yyyy-MM-dd HH:mm:ss X";
+
+    public static final String SUBJECT_DATE = "yyyy/MM/dd";
 
     public static final ZoneId DEFAULT_ZONE_ID = ZoneId.of("Asia/Shanghai");
 
@@ -133,9 +131,7 @@ public class DateUtil {
      *
      * @param strDate String
      * @param strFormat String
-     *
      * @return FormatDate
-     *
      * @throws ParseException ParseException
      */
     public static Date getFormatDate(String strDate, String strFormat) throws ParseException {
@@ -153,7 +149,6 @@ public class DateUtil {
      *
      * @param date Date
      * @param toFormat String
-     *
      * @return FormatDate String
      */
     public static String dateToString(Date date, String toFormat) {
@@ -180,4 +175,31 @@ public class DateUtil {
                         LocalDate.ofInstant(end.toInstant(), DEFAULT_ZONE_ID),
                         LocalDate.ofInstant(start.toInstant(), DEFAULT_ZONE_ID));
     }
+
+    /**
+     * 根据指定格式,将Date转化为为字符串
+     *
+     * @param date
+     * @param format
+     * @return
+     */
+    public static String formatDate(Date date, String format) {
+        SimpleDateFormat dateFormat = new SimpleDateFormat(format);
+        dateFormat.setLenient(false);
+        return dateFormat.format(date);
+    }
+
+    /**
+     * 根据指定格式,将字符串解析为Date
+     *
+     * @param dateStr
+     * @param format
+     * @return
+     * @throws ParseException
+     */
+    public static Date parseDate(String dateStr, String format) throws ParseException {
+        SimpleDateFormat dateFormat = new SimpleDateFormat(format);
+        dateFormat.setLenient(false);
+        return dateFormat.parse(dateStr);
+    }
 }

+ 1 - 1
jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/adweb/seo/mapper/SeoKeywordsMapper.java

@@ -64,6 +64,6 @@ public interface SeoKeywordsMapper extends BaseMapper<SeoKeywords> {
      *
      * @return 关键词列表
      */
-    List<SeoKeywords> getKeywordsToSerp(int keywordType);
+    List<SeoKeywords> getKeywordsToSerp(int keywordType, int limit);
 
 }

+ 3 - 0
jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/adweb/seo/mapper/xml/SeoKeywordsMapper.xml

@@ -168,6 +168,8 @@
         seo_keywords
         WHERE
         `status` = 1
+        AND
+        'search_status' != 1
         AND site_code IN (
         SELECT
         `code`
@@ -181,5 +183,6 @@
             AND keyword_type = #{keywordType}
         </if>
         AND last_search_time <![CDATA[ <]]> CURDATE()
+        LIMIT #{limit}
     </select>
 </mapper>

+ 187 - 43
jeecg-module-system/jeecg-system-biz/src/main/java/org/jeecg/modules/adweb/seo/service/dataforseo/DataForSEOService.java

@@ -1,5 +1,9 @@
 package org.jeecg.modules.adweb.seo.service.dataforseo;
 
+import cn.hutool.json.JSONUtil;
+
+import com.google.common.collect.Lists;
+
 import io.github.dataforseo.client.ApiClient;
 import io.github.dataforseo.client.ApiException;
 import io.github.dataforseo.client.api.SerpApi;
@@ -10,11 +14,12 @@ import jakarta.annotation.PostConstruct;
 
 import lombok.extern.slf4j.Slf4j;
 
+import org.jeecg.common.util.RedisUtil;
 import org.jeecg.modules.adweb.common.util.DateUtil;
 import org.jeecg.modules.adweb.common.util.ListUtil;
 import org.jeecg.modules.adweb.seo.entity.SeoKeywords;
-import org.jeecg.modules.adweb.seo.entity.SeoKeywordsSerp;
 import org.jeecg.modules.adweb.seo.mapper.SeoKeywordsMapper;
+import org.jeecg.modules.adweb.seo.service.ISeoKeywordsSerpService;
 import org.jeecg.modules.adweb.seo.service.ISeoKeywordsService;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Value;
@@ -22,14 +27,23 @@ import org.springframework.stereotype.Service;
 
 import java.util.Date;
 import java.util.List;
+import java.util.Map;
 
 /**
+ * DataForSEO Serp查询 - 基于Redis同步
+ *
+ * <p>暂不使用DataForSEO callback实时更新方式,考虑稳定性
+ *
  * @author wfansh
  */
 @Slf4j
 @Service
 public class DataForSEOService {
 
+    private static final int MAX_TASKS_PER_SERP_REQUEST = 100;
+    private static final int SERP_REQUEST_CODE_SUCCESS = 20000;
+    private static final int SERP_TASK_CODE_SUCCESS = 20100;
+
     @Value("${dataforseo.username}")
     private String username;
 
@@ -43,14 +57,18 @@ public class DataForSEOService {
 
     @Autowired private ISeoKeywordsService seoKeywordsService;
 
+    @Autowired private ISeoKeywordsSerpService seoKeywordsSerpService;
+
+    @Autowired private RedisUtil redisUtil;
+
     private SerpApi serpApi;
 
     @PostConstruct
     private void init() {
         ApiClient defaultClient = io.github.dataforseo.client.Configuration.getDefaultApiClient();
         defaultClient.setBasePath(apiPath);
-        // HTTP超时 - 30秒
-        defaultClient.setConnectTimeout(30 * 1000);
+        // API超时 - 60秒
+        defaultClient.setConnectTimeout(60 * 1000);
 
         // API认证方式 - basicAuth
         HttpBasicAuth basicAuth = (HttpBasicAuth) defaultClient.getAuthentication("basicAuth");
@@ -61,50 +79,176 @@ public class DataForSEOService {
     }
 
     /**
-     * 从DataForSEO拉取keywords serp数据,同步到{@link SeoKeywordsSerp}表
+     * 全局查询更新DataForSEO keywords serp数据
      *
      * @param keywordType 1 - 指定词; 2 - 长尾词
+     * @param limit 最大查询条数
      */
-    public void syncKeywordsSerp(int keywordType, int limit) throws ApiException {
-        Date now = new Date();
-
-        // 1. 查询待更新keywords
-        List<SeoKeywords> seoKeywordsList =
-                seoKeywordsMapper.getKeywordsToSerp(keywordType).subList(0, limit);
-        if (ListUtil.isEmpty(seoKeywordsList)) {
-            log.info("没有待Serp查询的关键词");
-            return;
+    public void runKeywordsSerpTasks(int keywordType, int limit) {
+        List<SeoKeywords> seoKeywords = seoKeywordsMapper.getKeywordsToSerp(keywordType, limit);
+
+        if (ListUtil.isEmpty(seoKeywords)) {
+            log.info("暂无需要Serp查询的关键词");
+        } else {
+            // DataForSEO - each POST call containing no more than 100 tasks
+            // https://docs.dataforseo.com/v3/serp/google/organic/task_post/?bash
+            Lists.partition(seoKeywords, MAX_TASKS_PER_SERP_REQUEST).forEach(this::sendSerpRequest);
         }
+    }
+
+    /**
+     * 向DataForSEO发送Serp请求
+     *
+     * <p>1. 将taskId保存到Redis
+     *
+     * <p>2. 更新{@link SeoKeywords}表
+     */
+    private void sendSerpRequest(List<SeoKeywords> seoKeywords) {
+        try {
+            Date now = new Date();
+
+            // 1. 创建DataForSEO Serp查询请求,每个请求最多包含100个任务
+            List<SerpTaskRequestInfo> serpTaskRequestInfoList = Lists.newArrayList();
+            for (SeoKeywords seoKeyword :
+                    seoKeywords.subList(
+                            0, Math.min(seoKeywords.size(), MAX_TASKS_PER_SERP_REQUEST))) {
+                SerpTaskRequestInfo serpTaskRequestInfo = new SerpTaskRequestInfo();
+                serpTaskRequestInfo.setKeyword(seoKeyword.getKeywords());
+                serpTaskRequestInfo.setSeDomain("google.com");
+                serpTaskRequestInfo.setLanguageCode("en");
+                serpTaskRequestInfo.setLocationCode(2840); // 美国
+                serpTaskRequestInfo.setTag(Integer.toString(seoKeyword.getId())); // tag = keywordId
+                serpTaskRequestInfoList.add(serpTaskRequestInfo);
+            }
+
+            // 2. 发送DataForSEO Serp查询请求,验证并打印响应结果
+            SerpGoogleOrganicTaskPostResponseInfo serpTaskPostResponseInfo =
+                    serpApi.googleOrganicTaskPost(serpTaskRequestInfoList);
+            log.info(
+                    "创建DataForSEO Serp任务,response = {}",
+                    JSONUtil.toJsonStr(serpTaskPostResponseInfo));
+            if (serpTaskPostResponseInfo.getStatusCode() != SERP_REQUEST_CODE_SUCCESS) {
+                log.error(serpTaskPostResponseInfo.getStatusMessage());
+                throw new ApiException(serpTaskPostResponseInfo.getStatusMessage());
+            }
+
+            // 3. 过滤状态为成功的Serp task
+            List<SerpGoogleOrganicTaskPostTaskInfo> serpingTasks =
+                    serpTaskPostResponseInfo.getTasks().stream()
+                            .filter(task -> task.getStatusCode() == SERP_TASK_CODE_SUCCESS)
+                            .toList();
+            List<Integer> serpingKeywordIds = Lists.newArrayList();
+
+            // 4. 将正在查询的Serp task放进Redis
+            for (SerpGoogleOrganicTaskPostTaskInfo serpingTask : serpingTasks) {
+                Map<String, String> data = (Map<String, String>) serpingTask.getData();
+                String keywordId = data.get("tag");
+                redisUtil.set(this.getSerpTaskRedisKey(keywordId), serpingTask.getId());
+
+                serpingKeywordIds.add(Integer.parseInt(keywordId));
+            }
+
+            // 5. 更新SeoKeywords表
+            List<SeoKeywords> serpingKeywords =
+                    seoKeywords.stream()
+                            .filter(keyword -> serpingKeywordIds.contains(keyword.getId()))
+                            .toList();
+            serpingKeywords.forEach(
+                    seoKeyword -> {
+                        // TODO: Why
+                        seoKeyword.setTimerLastSearchTime(DateUtil.getTodayZeroTime(now));
+                        // 状态 -> 正在查询
+                        seoKeyword.setSearchStatus(1);
+                    });
+            seoKeywordsService.updateBatchById(serpingKeywords);
+
+            log.info(
+                    "{}个关键词Serp查询任务创建完成 {}",
+                    serpingKeywords.size(),
+                    serpingKeywords.stream().map(SeoKeywords::getId).toList());
+        } catch (ApiException e) {
+            log.error(e.getMessage(), e);
+        }
+    }
 
-        // 2. 发送DataForSEO Serp查询请求
-        //        List<SerpGoogleOrganicLiveAdvancedRequestInfo> serpTasks =
-        //                seoKeywordsList.stream()
-        //                        .map(
-        //                                seoKeyword -> {
-        //                                    SerpGoogleOrganicLiveAdvancedRequestInfo serpTask =
-        //                                            new
-        // SerpGoogleOrganicLiveAdvancedRequestInfo();
-        //                                    serpTask.setKeyword(seoKeyword.getKeywords());
-        //                                    serpTask.setTag(Integer.toString(seoKeyword.getId()));
-        //                                    return serpTask;
-        //                                })
-        //                        .toList();
-        //
-        //        SerpGoogleOrganicLiveAdvancedResponseInfo serpResults =
-        //                serpApi.googleOrganicLiveAdvanced(serpTasks);
-
-        // 3. 更新SeoKeywords表
-        seoKeywordsList.forEach(
-                seoKeyword -> {
-                    // TODO: why?
-                    seoKeyword.setTimerLastSearchTime(DateUtil.getTodayZeroTime(now));
-                    // On search.
-                    seoKeyword.setSearchStatus(1);
-                });
-        seoKeywordsService.updateBatchById(seoKeywordsList);
-        log.info(
-                "{}个关键词serp查询任务创建完成 {}",
-                seoKeywordsList.size(),
-                seoKeywordsList.stream().map(SeoKeywords::getId).toList());
+    //    /** 处理Serp查询结果 */
+    //    public boolean onSerpResult(String taskId) throws Exception {
+    //        SerpGoogleOrganicTaskGetAdvancedTaskInfo serpTask =
+    //                serpApi.googleOrganicTaskGetAdvanced(taskId).getTasks().get(0);
+    //        Map<String, String> data = (Map<String, String>) serpTask.getData();
+    //        SerpGoogleOrganicTaskGetAdvancedResultInfo serpResult = serpTask.getResult().get(0);
+    //
+    //        int keywordId = Integer.parseInt(data.get("tag"));
+    //        String seDomain = serpResult.getSeDomain();
+    //        String checkUrl = serpResult.getCheckUrl();
+    //        Date seDatetime =
+    //                DateUtil.parseDate(serpResult.getDatetime(),
+    // DateUtil.ZONED_DATE_TIME_PATTERN);
+    //
+    //        // 1. 查询keyword
+    //        SeoKeywords seoKeyword = seoKeywordsService.getById(keywordId);
+    //        if (Objects.isNull(seoKeyword)) {
+    //            log.info("无法获取关键词 id = {}", keywordId);
+    //            return false;
+    //        }
+    //        String topPrivateDomain = CommonUtil.getTopPrivateDomain(seoKeyword.getDomain()); //
+    // 顶级域名
+    //        OrganicSerpElementItem serpItem =
+    //                serpResult.getItems().stream()
+    //                        .map(item -> (OrganicSerpElementItem) item)
+    //                        .filter(item -> item.getType().equalsIgnoreCase("organic"))
+    //                        .filter(item -> item.getDomain().contains(topPrivateDomain))
+    //                        .findAny()
+    //                        .orElse(null);
+    //
+    //        // 2. 更新SeoKeywords表
+    //        UpdateWrapper<SeoKeywords> seoKeywordsUpdateWrapper = new UpdateWrapper<>();
+    //        seoKeywordsUpdateWrapper.eq("id", keywordId);
+    //        // TODO: why?
+    //        seoKeywordsUpdateWrapper.set("last_search_time", seDatetime);
+    //        seoKeywordsUpdateWrapper.set("last_rank", 0);
+    //        // 查询结束
+    //        seoKeywordsUpdateWrapper.set("search_status", 0);
+    //        if (Objects.nonNull(serpItem)) {
+    //            seoKeywordsUpdateWrapper.set(
+    //                    "position_url", StringUtils.removeEnd(serpItem.getUrl(), "/"));
+    //            seoKeywordsUpdateWrapper.set("last_rank", serpItem.getRankGroup());
+    //        }
+    //        seoKeywordsService.update(seoKeywordsUpdateWrapper);
+    //
+    //        // 3. 更新SeoSerp表
+    //        // 3.1 补充与上次更新时间之间的数据
+    //        seoKeywordsSerpService.fillKeywordsSerpHistory(keywordId, seDatetime);
+    //        // 3.2 更新Serp表
+    //        SeoKeywordsSerp seoKeywordsSerp =
+    //                seoKeywordsSerpService
+    //                        .list(
+    //                                new LambdaQueryWrapper<SeoKeywordsSerp>()
+    //                                        .eq(SeoKeywordsSerp::getKeywordsId, keywordId)
+    //                                        .eq(
+    //                                                SeoKeywordsSerp::getSeDate,
+    //                                                DateUtil.formatDate(
+    //                                                        seDatetime, DateUtil.DATE_PATTERN)))
+    //                        .stream()
+    //                        .findFirst()
+    //                        .orElse(new SeoKeywordsSerp());
+    //
+    //        seoKeywordsSerp.setKeywordsId(keywordsId);
+    //        seoKeywordsSerp.setSearchUrl(checkUrl);
+    //        seoKeywordsSerp.setSeDomain(seDomain);
+    //        seoKeywordsSerp.setLanguageCode(seoKeywords.getLang());
+    //        seoKeywordsSerp.setType("organic_results");
+    //        seoKeywordsSerp.setRankAbsolute(rank);
+    //        seoKeywordsSerp.setPageNumber(rank / PAGE_SIZE + 1);
+    //        seoKeywordsSerp.setRankType(rank);
+    //        seoKeywordsSerp.setSeDate(DateUtil.formatDate(seDatetime, DateUtil.DATE_PATTERN));
+    //        seoKeywordsSerp.setSeDatetime(seDatetime);
+    //
+    //        seoKeywordsSerpService.saveOrUpdate(seoKeywordsSerp);
+    //        return true;
+    //    }
+
+    private String getSerpTaskRedisKey(String keywordId) {
+        return String.format("serp_task:%s", keywordId);
     }
 }

+ 4 - 3
jeecg-module-system/jeecg-system-start/src/test/java/org/jeecg/modules/adweb/seo/service/DataForSEOTest.java

@@ -1,8 +1,8 @@
 package org.jeecg.modules.adweb.seo.service;
 
-
 import org.jeecg.modules.adweb.common.constant.AdwebConstant;
 import org.jeecg.modules.adweb.seo.service.dataforseo.DataForSEOService;
+import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.boot.test.context.SpringBootTest;
@@ -15,7 +15,8 @@ public class DataForSEOTest {
     @Autowired private DataForSEOService dataForSEOService;
 
     @Test
-    public void testSerpGoogleOrganic() throws Exception {
-        dataForSEOService.syncKeywordsSerp(AdwebConstant.KEYWORD_TYPE_APPOINT, 3);
+    @Disabled
+    public void testRunKeywordsSerpTasks() throws Exception {
+        dataForSEOService.runKeywordsSerpTasks(AdwebConstant.KEYWORD_TYPE_APPOINT, 10);
     }
 }