* /index.php/vod/type/id/1/page/2.html
- *
+ *
* https://yutujx.com/?url=https://t20a.cdn2020.com/video/m3u8/2022/10/23/cc234c9c/index.m3u8
*/
+
+ public static final String BASE_URL = "https://madou.io";
+ public static final String VIDEO_PACKAGE = BASE_URL + "/index.php/vod/type/id/{id}/page/{page_size}.html";
+
+ private final ClassifyInfoService classifyInfoService;
+
+
+ public void getVideoList() {
+ List list = classifyInfoService.list();
+ int count = 0;
+
+ for (ClassifyInfo classifyInfo : list) {
+ log.info("开始处理分类:{}", classifyInfo.getName());
+ for (int i = 1; i <= classifyInfo.getPageSize(); i++) {
+ String url = VIDEO_PACKAGE.replace("{id}", classifyInfo.getId()).replace("{page_size}", String.valueOf(i));
+ log.info("正在处理第{}页,页面地址:{}", i, url);
+ try {
+ saveVideo(url, classifyInfo.getName());
+ count++;
+ } catch (Exception e) {
+ log.error("发送请求失败,跳过该页数据的爬取,URL:{}, 错误信息:{}", url, e.getMessage());
+ }
+ }
+
+ }
+ log.info("数据处理完成,共处理 {} 页数据", count);
+ }
+
+
+ public void saveVideo(String url, String classify) throws IOException {
+ String html = HttpUtils.get(url);
+ Document document = Jsoup.parse(html);
+
+ Elements imgPs = document.getElementsByClass("img");
+ for (Element imgP : imgPs) {
+ Element img = imgP.child(0);
+
+ String title = img.attr("title");
+ String coverUrl = img.attr("src");
+ Element a = imgP.child(1);
+ String m3u8Url = a.attr("href");
+ log.info("分类:{},标题:{},封面:{},播放地址:{}", classify, title, coverUrl, m3u8Url);
+
+ MadouVideoInfo madouVideoInfo = new MadouVideoInfo();
+ madouVideoInfo.setClassify(classify);
+ madouVideoInfo.setTitle(title);
+ madouVideoInfo.setCoverUrl(coverUrl);
+ madouVideoInfo.setM3u8Url(m3u8Url);
+ save(madouVideoInfo);
+ }
+ }
+
+ @PostConstruct
+ public void processingData() {
+ QueryWrapper wrapper = new QueryWrapper<>();
+ wrapper.eq("m3u8", false);
+ List list = list(wrapper);
+ log.info("开始处理数据,数据总量: {}", list.size());
+
+ for (MadouVideoInfo madouVideoInfo : list) {
+ String url = BASE_URL + madouVideoInfo.getM3u8Url();
+ try {
+ String html = HttpUtils.get(url);
+
+ Document document = Jsoup.parse(html);
+ Element bofangBox = document.getElementById("bofang_box");
+ Element script = bofangBox.child(0);
+ String scriptStr = script.html();
+ JSONObject videoInfo = JSONObject.parseObject(scriptStr.substring(scriptStr.indexOf("=") + 1));
+ String m3u8Url = videoInfo.getString("url");
+ log.info("标题: {}, 播放地址:{}", madouVideoInfo.getTitle(), m3u8Url);
+ madouVideoInfo.setM3u8Url(m3u8Url);
+ madouVideoInfo.setM3u8(true);
+ updateById(madouVideoInfo);
+ } catch (Exception e) {
+ log.error("处理失败,失败信息:{}", e.getMessage());
+ }
+ }
+
+
+ }
}
diff --git a/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java b/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java
new file mode 100644
index 0000000..7ea0fdd
--- /dev/null
+++ b/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java
@@ -0,0 +1,29 @@
+package top.yuchat.crawler.video.utils;
+
+import lombok.extern.slf4j.Slf4j;
+import org.apache.http.HttpEntity;
+import org.apache.http.HttpStatus;
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.CloseableHttpResponse;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.http.util.EntityUtils;
+
+import java.io.IOException;
+
+@Slf4j
+public class HttpUtils {
+ private static final HttpClient HTTP_CLIENT = new DefaultHttpClient();
+
+ public static String get(String url) throws IOException {
+ HttpGet httpGet = new HttpGet(url);
+ CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
+ int statusCode = response.getStatusLine().getStatusCode();
+ if (HttpStatus.SC_OK != statusCode) {
+ throw new RuntimeException("请求失败,状态码:" + statusCode + ",请求地址:" + url);
+ }
+ HttpEntity entity = response.getEntity();
+ return EntityUtils.toString(entity);
+ }
+
+}
diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml
new file mode 100644
index 0000000..c4d96da
--- /dev/null
+++ b/src/main/resources/application.yml
@@ -0,0 +1,6 @@
+spring:
+ datasource:
+ driver-class-name: org.postgresql.Driver
+ url: jdbc:postgresql://pgsql.yuchat.top:5432/postgres
+ username: postgres
+ password: longqi@1314
\ No newline at end of file