抓取视频播放源地址

This commit is contained in:
燕陇琪 2024-10-14 01:29:48 +08:00
parent 66136b5679
commit d940ec6cc8
8 changed files with 193 additions and 3 deletions

25
pom.xml
View File

@ -30,7 +30,7 @@
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
@ -39,6 +39,29 @@
<version>3.5.7</version>
</dependency>
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient</artifactId>
<version>4.5.14</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.51</version>
</dependency>
</dependencies>

View File

@ -0,0 +1,15 @@
package top.yuchat.crawler.video.models.entity;
import lombok.Data;
@Data
public class ClassifyInfo {
private String id;
private String name;
private String url;
private Integer pageSize;
}

View File

@ -18,4 +18,6 @@ public class MadouVideoInfo {
private String m3u8Url;
private Boolean m3u8;
}

View File

@ -0,0 +1,7 @@
package top.yuchat.crawler.video.models.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
public interface ClassifyInfoMapper extends BaseMapper<ClassifyInfo> {
}

View File

@ -0,0 +1,10 @@
package top.yuchat.crawler.video.models.service;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import org.springframework.stereotype.Service;
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
import top.yuchat.crawler.video.models.mapper.ClassifyInfoMapper;
@Service
public class ClassifyInfoService extends ServiceImpl<ClassifyInfoMapper, ClassifyInfo> {
}

View File

@ -1,11 +1,28 @@
package top.yuchat.crawler.video.models.service;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONObject;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.stereotype.Service;
import top.yuchat.crawler.video.utils.HttpUtils;
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
import top.yuchat.crawler.video.models.entity.MadouVideoInfo;
import top.yuchat.crawler.video.models.mapper.MadouVideoMapper;
import java.io.IOException;
import java.util.List;
@Slf4j
@Service
@RequiredArgsConstructor
public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoInfo> {
/**
@ -13,9 +30,90 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
* 1-10
* 20-27
* 29-30
*
* <p>
* /index.php/vod/type/id/1/page/2.html
*
* <p>
* https://yutujx.com/?url=https://t20a.cdn2020.com/video/m3u8/2022/10/23/cc234c9c/index.m3u8
*/
public static final String BASE_URL = "https://madou.io";
public static final String VIDEO_PACKAGE = BASE_URL + "/index.php/vod/type/id/{id}/page/{page_size}.html";
private final ClassifyInfoService classifyInfoService;
public void getVideoList() {
List<ClassifyInfo> list = classifyInfoService.list();
int count = 0;
for (ClassifyInfo classifyInfo : list) {
log.info("开始处理分类:{}", classifyInfo.getName());
for (int i = 1; i <= classifyInfo.getPageSize(); i++) {
String url = VIDEO_PACKAGE.replace("{id}", classifyInfo.getId()).replace("{page_size}", String.valueOf(i));
log.info("正在处理第{}页,页面地址:{}", i, url);
try {
saveVideo(url, classifyInfo.getName());
count++;
} catch (Exception e) {
log.error("发送请求失败跳过该页数据的爬取URL{}, 错误信息:{}", url, e.getMessage());
}
}
}
log.info("数据处理完成,共处理 {} 页数据", count);
}
public void saveVideo(String url, String classify) throws IOException {
String html = HttpUtils.get(url);
Document document = Jsoup.parse(html);
Elements imgPs = document.getElementsByClass("img");
for (Element imgP : imgPs) {
Element img = imgP.child(0);
String title = img.attr("title");
String coverUrl = img.attr("src");
Element a = imgP.child(1);
String m3u8Url = a.attr("href");
log.info("分类:{},标题:{},封面:{},播放地址:{}", classify, title, coverUrl, m3u8Url);
MadouVideoInfo madouVideoInfo = new MadouVideoInfo();
madouVideoInfo.setClassify(classify);
madouVideoInfo.setTitle(title);
madouVideoInfo.setCoverUrl(coverUrl);
madouVideoInfo.setM3u8Url(m3u8Url);
save(madouVideoInfo);
}
}
@PostConstruct
public void processingData() {
QueryWrapper<MadouVideoInfo> wrapper = new QueryWrapper<>();
wrapper.eq("m3u8", false);
List<MadouVideoInfo> list = list(wrapper);
log.info("开始处理数据,数据总量: {}", list.size());
for (MadouVideoInfo madouVideoInfo : list) {
String url = BASE_URL + madouVideoInfo.getM3u8Url();
try {
String html = HttpUtils.get(url);
Document document = Jsoup.parse(html);
Element bofangBox = document.getElementById("bofang_box");
Element script = bofangBox.child(0);
String scriptStr = script.html();
JSONObject videoInfo = JSONObject.parseObject(scriptStr.substring(scriptStr.indexOf("=") + 1));
String m3u8Url = videoInfo.getString("url");
log.info("标题: {} 播放地址:{}", madouVideoInfo.getTitle(), m3u8Url);
madouVideoInfo.setM3u8Url(m3u8Url);
madouVideoInfo.setM3u8(true);
updateById(madouVideoInfo);
} catch (Exception e) {
log.error("处理失败,失败信息:{}", e.getMessage());
}
}
}
}

View File

@ -0,0 +1,29 @@
package top.yuchat.crawler.video.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
@Slf4j
public class HttpUtils {
private static final HttpClient HTTP_CLIENT = new DefaultHttpClient();
public static String get(String url) throws IOException {
HttpGet httpGet = new HttpGet(url);
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (HttpStatus.SC_OK != statusCode) {
throw new RuntimeException("请求失败,状态码:" + statusCode + ",请求地址:" + url);
}
HttpEntity entity = response.getEntity();
return EntityUtils.toString(entity);
}
}

View File

@ -0,0 +1,6 @@
spring:
datasource:
driver-class-name: org.postgresql.Driver
url: jdbc:postgresql://pgsql.yuchat.top:5432/postgres
username: postgres
password: longqi@1314