抓取视频播放源地址
This commit is contained in:
parent
66136b5679
commit
d940ec6cc8
25
pom.xml
25
pom.xml
@ -30,7 +30,7 @@
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-web</artifactId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
@ -39,6 +39,29 @@
|
||||
<version>3.5.7</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.postgresql</groupId>
|
||||
<artifactId>postgresql</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>httpclient</artifactId>
|
||||
<version>4.5.14</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>1.15.3</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.alibaba.fastjson2</groupId>
|
||||
<artifactId>fastjson2</artifactId>
|
||||
<version>2.0.51</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
|
||||
@ -0,0 +1,15 @@
|
||||
package top.yuchat.crawler.video.models.entity;
|
||||
|
||||
import lombok.Data;
|
||||
|
||||
@Data
|
||||
public class ClassifyInfo {
|
||||
|
||||
private String id;
|
||||
|
||||
private String name;
|
||||
|
||||
private String url;
|
||||
|
||||
private Integer pageSize;
|
||||
}
|
||||
@ -18,4 +18,6 @@ public class MadouVideoInfo {
|
||||
|
||||
private String m3u8Url;
|
||||
|
||||
private Boolean m3u8;
|
||||
|
||||
}
|
||||
|
||||
@ -0,0 +1,7 @@
|
||||
package top.yuchat.crawler.video.models.mapper;
|
||||
|
||||
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
|
||||
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
|
||||
|
||||
public interface ClassifyInfoMapper extends BaseMapper<ClassifyInfo> {
|
||||
}
|
||||
@ -0,0 +1,10 @@
|
||||
package top.yuchat.crawler.video.models.service;
|
||||
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import org.springframework.stereotype.Service;
|
||||
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
|
||||
import top.yuchat.crawler.video.models.mapper.ClassifyInfoMapper;
|
||||
|
||||
@Service
|
||||
public class ClassifyInfoService extends ServiceImpl<ClassifyInfoMapper, ClassifyInfo> {
|
||||
}
|
||||
@ -1,11 +1,28 @@
|
||||
package top.yuchat.crawler.video.models.service;
|
||||
|
||||
import com.alibaba.fastjson2.JSON;
|
||||
import com.alibaba.fastjson2.JSONObject;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
||||
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.springframework.stereotype.Service;
|
||||
import top.yuchat.crawler.video.utils.HttpUtils;
|
||||
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
|
||||
import top.yuchat.crawler.video.models.entity.MadouVideoInfo;
|
||||
import top.yuchat.crawler.video.models.mapper.MadouVideoMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoInfo> {
|
||||
|
||||
/**
|
||||
@ -13,9 +30,90 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
* 1-10
|
||||
* 20-27
|
||||
* 29-30
|
||||
*
|
||||
* <p>
|
||||
* /index.php/vod/type/id/1/page/2.html
|
||||
*
|
||||
* <p>
|
||||
* https://yutujx.com/?url=https://t20a.cdn2020.com/video/m3u8/2022/10/23/cc234c9c/index.m3u8
|
||||
*/
|
||||
|
||||
public static final String BASE_URL = "https://madou.io";
|
||||
public static final String VIDEO_PACKAGE = BASE_URL + "/index.php/vod/type/id/{id}/page/{page_size}.html";
|
||||
|
||||
private final ClassifyInfoService classifyInfoService;
|
||||
|
||||
|
||||
public void getVideoList() {
|
||||
List<ClassifyInfo> list = classifyInfoService.list();
|
||||
int count = 0;
|
||||
|
||||
for (ClassifyInfo classifyInfo : list) {
|
||||
log.info("开始处理分类:{}", classifyInfo.getName());
|
||||
for (int i = 1; i <= classifyInfo.getPageSize(); i++) {
|
||||
String url = VIDEO_PACKAGE.replace("{id}", classifyInfo.getId()).replace("{page_size}", String.valueOf(i));
|
||||
log.info("正在处理第{}页,页面地址:{}", i, url);
|
||||
try {
|
||||
saveVideo(url, classifyInfo.getName());
|
||||
count++;
|
||||
} catch (Exception e) {
|
||||
log.error("发送请求失败,跳过该页数据的爬取,URL:{}, 错误信息:{}", url, e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
log.info("数据处理完成,共处理 {} 页数据", count);
|
||||
}
|
||||
|
||||
|
||||
public void saveVideo(String url, String classify) throws IOException {
|
||||
String html = HttpUtils.get(url);
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
Elements imgPs = document.getElementsByClass("img");
|
||||
for (Element imgP : imgPs) {
|
||||
Element img = imgP.child(0);
|
||||
|
||||
String title = img.attr("title");
|
||||
String coverUrl = img.attr("src");
|
||||
Element a = imgP.child(1);
|
||||
String m3u8Url = a.attr("href");
|
||||
log.info("分类:{},标题:{},封面:{},播放地址:{}", classify, title, coverUrl, m3u8Url);
|
||||
|
||||
MadouVideoInfo madouVideoInfo = new MadouVideoInfo();
|
||||
madouVideoInfo.setClassify(classify);
|
||||
madouVideoInfo.setTitle(title);
|
||||
madouVideoInfo.setCoverUrl(coverUrl);
|
||||
madouVideoInfo.setM3u8Url(m3u8Url);
|
||||
save(madouVideoInfo);
|
||||
}
|
||||
}
|
||||
|
||||
@PostConstruct
|
||||
public void processingData() {
|
||||
QueryWrapper<MadouVideoInfo> wrapper = new QueryWrapper<>();
|
||||
wrapper.eq("m3u8", false);
|
||||
List<MadouVideoInfo> list = list(wrapper);
|
||||
log.info("开始处理数据,数据总量: {}", list.size());
|
||||
|
||||
for (MadouVideoInfo madouVideoInfo : list) {
|
||||
String url = BASE_URL + madouVideoInfo.getM3u8Url();
|
||||
try {
|
||||
String html = HttpUtils.get(url);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
Element bofangBox = document.getElementById("bofang_box");
|
||||
Element script = bofangBox.child(0);
|
||||
String scriptStr = script.html();
|
||||
JSONObject videoInfo = JSONObject.parseObject(scriptStr.substring(scriptStr.indexOf("=") + 1));
|
||||
String m3u8Url = videoInfo.getString("url");
|
||||
log.info("标题: {}, 播放地址:{}", madouVideoInfo.getTitle(), m3u8Url);
|
||||
madouVideoInfo.setM3u8Url(m3u8Url);
|
||||
madouVideoInfo.setM3u8(true);
|
||||
updateById(madouVideoInfo);
|
||||
} catch (Exception e) {
|
||||
log.error("处理失败,失败信息:{}", e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
29
src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java
Normal file
29
src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java
Normal file
@ -0,0 +1,29 @@
|
||||
package top.yuchat.crawler.video.utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@Slf4j
|
||||
public class HttpUtils {
|
||||
private static final HttpClient HTTP_CLIENT = new DefaultHttpClient();
|
||||
|
||||
public static String get(String url) throws IOException {
|
||||
HttpGet httpGet = new HttpGet(url);
|
||||
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (HttpStatus.SC_OK != statusCode) {
|
||||
throw new RuntimeException("请求失败,状态码:" + statusCode + ",请求地址:" + url);
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
return EntityUtils.toString(entity);
|
||||
}
|
||||
|
||||
}
|
||||
6
src/main/resources/application.yml
Normal file
6
src/main/resources/application.yml
Normal file
@ -0,0 +1,6 @@
|
||||
spring:
|
||||
datasource:
|
||||
driver-class-name: org.postgresql.Driver
|
||||
url: jdbc:postgresql://pgsql.yuchat.top:5432/postgres
|
||||
username: postgres
|
||||
password: longqi@1314
|
||||
Loading…
x
Reference in New Issue
Block a user