From edb308a495faab95d25f6708e948985292413414 Mon Sep 17 00:00:00 2001 From: yanlongqi Date: Fri, 18 Oct 2024 22:23:05 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=B8=8B=E8=BD=BD=E7=BA=BF?= =?UTF-8?q?=E7=A8=8B=EF=BC=8C=E6=8F=90=E9=AB=98=E4=B8=8B=E8=BD=BD=E9=80=9F?= =?UTF-8?q?=E5=BA=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../models/service/MadouVideoService.java | 18 +-- .../video/utils/HttpRequestComponent.java | 113 ++++++++++++++++++ .../yuchat/crawler/video/utils/HttpUtils.java | 53 -------- 3 files changed, 122 insertions(+), 62 deletions(-) create mode 100644 src/main/java/top/yuchat/crawler/video/utils/HttpRequestComponent.java delete mode 100644 src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java diff --git a/src/main/java/top/yuchat/crawler/video/models/service/MadouVideoService.java b/src/main/java/top/yuchat/crawler/video/models/service/MadouVideoService.java index c9d0fba..23dcd58 100644 --- a/src/main/java/top/yuchat/crawler/video/models/service/MadouVideoService.java +++ b/src/main/java/top/yuchat/crawler/video/models/service/MadouVideoService.java @@ -12,10 +12,10 @@ import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; -import top.yuchat.crawler.video.utils.HttpUtils; import top.yuchat.crawler.video.models.entity.ClassifyInfo; import top.yuchat.crawler.video.models.entity.MadouVideoInfo; import top.yuchat.crawler.video.models.mapper.MadouVideoMapper; +import top.yuchat.crawler.video.utils.HttpRequestComponent; import java.io.IOException; import java.nio.file.Files; @@ -56,8 +56,9 @@ public class MadouVideoService extends ServiceImpl { try { log.info("下载封面图片,名称:{},地址:{}", videoInfo.getTitle(), videoInfo.getCoverUrl()); - HttpUtils.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf(".")))); + httpRequestComponent.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf(".")))); log.info("开始下载:{},m3u8地址:{}", videoInfo.getTitle(), videoInfo.getM3u8Url()); - String result = HttpUtils.get(videoInfo.getM3u8Url()); + String result = httpRequestComponent.get(videoInfo.getM3u8Url()); Path path = Paths.get(downloadBasePath, "m3u8", videoInfo.getId().toString()); if (!Files.exists(path)) { Files.createDirectories(path); @@ -162,16 +163,15 @@ public class MadouVideoService extends ServiceImpl tss = Arrays.stream(result.split("\n")).filter(t -> t.contains(".ts")).toList(); int size = tss.size(); for (int i = 0; i < size; i++) { - String ts = tss.get(i); log.info("正在下载ts文件,已下载:{},共:{},当前进度:{}%", i, size, String.format("%.2f", (i * 100.0 / size))); Path tsPath = Paths.get(path.toString(), ts); - HttpUtils.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath); + httpRequestComponent.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath); } videoInfo.setM3u8(false); updateById(videoInfo); } catch (Exception e) { - log.error("下载失败,标题:{},失败原因:{},地址:{}", videoInfo.getTitle(), videoInfo.getId(), e.getMessage()); + log.error("下载失败,标题:{}, 失败原因:{}", videoInfo.getTitle(), e.getMessage(), e); } }); } diff --git a/src/main/java/top/yuchat/crawler/video/utils/HttpRequestComponent.java b/src/main/java/top/yuchat/crawler/video/utils/HttpRequestComponent.java new file mode 100644 index 0000000..cc40c2c --- /dev/null +++ b/src/main/java/top/yuchat/crawler/video/utils/HttpRequestComponent.java @@ -0,0 +1,113 @@ +package top.yuchat.crawler.video.utils; + +import lombok.extern.slf4j.Slf4j; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHost; +import org.apache.http.HttpStatus; +import org.apache.http.client.config.RequestConfig; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpGet; +import org.apache.http.config.Registry; +import org.apache.http.config.RegistryBuilder; +import org.apache.http.conn.socket.ConnectionSocketFactory; +import org.apache.http.conn.socket.PlainConnectionSocketFactory; +import org.apache.http.conn.ssl.NoopHostnameVerifier; +import org.apache.http.conn.ssl.SSLConnectionSocketFactory; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.DefaultHttpRequestRetryHandler; +import org.apache.http.impl.client.HttpClients; +import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; +import org.apache.http.ssl.SSLContextBuilder; +import org.apache.http.util.EntityUtils; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.security.KeyManagementException; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; + +@Slf4j +@Component +public class HttpRequestComponent { + private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080); + + private CloseableHttpClient closeableHttpClient; + private RequestConfig requestConfig; + private int socketTimeout = 5000; + private int connectTimeout = 5000; + private int connectRequestTimeout = 5000; + private int maxTotal = 500; + private int maxPerRoute = 120; + + public HttpRequestComponent() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException { + init(); + createHttpClients(); + } + + public void createHttpClients() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException { + SSLContextBuilder builder = new SSLContextBuilder(); + builder.loadTrustMaterial(null, (_, _) -> true); + SSLConnectionSocketFactory sslref = new SSLConnectionSocketFactory(builder.build(), NoopHostnameVerifier.INSTANCE); + Registry registry = RegistryBuilder.create().register("http", new PlainConnectionSocketFactory()).register("https", sslref).build(); + PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(registry); + cm.setMaxTotal(maxTotal); + cm.setDefaultMaxPerRoute(maxPerRoute); + closeableHttpClient = HttpClients.custom() + .setSSLSocketFactory(sslref) + .setConnectionManager(cm) + .setRetryHandler(new DefaultHttpRequestRetryHandler(5, true)) + .setConnectionManagerShared(true) + .build(); + } + + public void init() { + this.requestConfig = RequestConfig.custom() + .setSocketTimeout(socketTimeout) + .setConnectTimeout(connectTimeout) + .setConnectionRequestTimeout(connectRequestTimeout) +// .setProxy(PROXY) + .build(); + } + + public String get(String url) throws IOException { + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(requestConfig); + CloseableHttpResponse response = closeableHttpClient.execute(httpGet); + int statusCode = response.getStatusLine().getStatusCode(); + if (HttpStatus.SC_OK != statusCode) { + log.error("请求失败,状态码:{}", statusCode); + throw new RuntimeException("请求失败,状态码:" + statusCode); + } + HttpEntity entity = response.getEntity(); + return EntityUtils.toString(entity); + } + + public void download(String url, Path path) throws IOException { + HttpGet httpGet = new HttpGet(url); + httpGet.setConfig(requestConfig); + CloseableHttpResponse response = closeableHttpClient.execute(httpGet); + int statusCode = response.getStatusLine().getStatusCode(); + if (HttpStatus.SC_OK != statusCode) { + log.error("文件下载失败,状态码:{}", statusCode); + throw new RuntimeException("文件下载失败"); + } + HttpEntity entity = response.getEntity(); + + if (Files.exists(path)) { + long contentLength = entity.getContentLength(); + long size = Files.size(path); + log.warn("文件已存在,请求文件大小:{},本地文件大小:{}", contentLength, size); + if (size >= contentLength) { + log.warn("文件已存在,跳过下载,文件路径:{}", path); + response.close(); + return; + } + log.warn("文件不完整,删除重新下载,PATH:{}", path); + Files.delete(path); + } + Files.copy(entity.getContent(), path); + } + +} diff --git a/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java b/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java deleted file mode 100644 index c2468a9..0000000 --- a/src/main/java/top/yuchat/crawler/video/utils/HttpUtils.java +++ /dev/null @@ -1,53 +0,0 @@ -package top.yuchat.crawler.video.utils; - -import lombok.extern.slf4j.Slf4j; -import org.apache.http.HttpEntity; -import org.apache.http.HttpHost; -import org.apache.http.HttpStatus; -import org.apache.http.client.HttpClient; -import org.apache.http.client.config.RequestConfig; -import org.apache.http.client.methods.CloseableHttpResponse; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.impl.client.DefaultHttpClient; -import org.apache.http.impl.client.HttpClients; -import org.apache.http.impl.conn.DefaultProxyRoutePlanner; -import org.apache.http.util.EntityUtils; - -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -@Slf4j -public class HttpUtils { - private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080); - private static final RequestConfig CONFIG = RequestConfig.custom().setProxy(PROXY).build(); - private static final HttpClient HTTP_CLIENT = HttpClients.createDefault(); - - public static String get(String url) throws IOException { - HttpGet httpGet = new HttpGet(url); - httpGet.setConfig(CONFIG); - CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet); - int statusCode = response.getStatusLine().getStatusCode(); - if (HttpStatus.SC_OK != statusCode) { - log.error("请求失败,状态码:{}", statusCode); - throw new RuntimeException("请求失败,状态码:" + statusCode); - } - HttpEntity entity = response.getEntity(); - return EntityUtils.toString(entity); - } - - public static void download(String url, Path path) throws IOException { - HttpGet httpGet = new HttpGet(url); - httpGet.setConfig(CONFIG); - CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet); - int statusCode = response.getStatusLine().getStatusCode(); - if (HttpStatus.SC_OK != statusCode) { - log.error("文件下载失败,状态码:{}", statusCode); - throw new RuntimeException("文件下载失败"); - } - HttpEntity entity = response.getEntity(); - Files.copy(entity.getContent(), path); - } - -}