优化下载线程,提高下载速度

This commit is contained in:
燕陇琪 2024-10-18 22:23:05 +08:00
parent 54f605382b
commit edb308a495
3 changed files with 122 additions and 62 deletions

View File

@ -12,10 +12,10 @@ import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import top.yuchat.crawler.video.utils.HttpUtils;
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
import top.yuchat.crawler.video.models.entity.MadouVideoInfo;
import top.yuchat.crawler.video.models.mapper.MadouVideoMapper;
import top.yuchat.crawler.video.utils.HttpRequestComponent;
import java.io.IOException;
import java.nio.file.Files;
@ -56,8 +56,9 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
public String downloadBasePath;
private final ClassifyInfoService classifyInfoService;
private final HttpRequestComponent httpRequestComponent;
ExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(16);
ExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(72);
public void getVideoList() {
@ -83,7 +84,7 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
public void saveVideo(String url, String classify) throws IOException {
String html = HttpUtils.get(url);
String html = httpRequestComponent.get(url);
Document document = Jsoup.parse(html);
Elements imgPs = document.getElementsByClass("img");
@ -115,7 +116,7 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
for (MadouVideoInfo madouVideoInfo : list) {
String url = BASE_URL + madouVideoInfo.getM3u8Url();
try {
String html = HttpUtils.get(url);
String html = httpRequestComponent.get(url);
Document document = Jsoup.parse(html);
Element bofangBox = document.getElementById("bofang_box");
@ -148,10 +149,10 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
scheduledThreadPool.submit(() -> {
try {
log.info("下载封面图片,名称:{},地址:{}", videoInfo.getTitle(), videoInfo.getCoverUrl());
HttpUtils.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf("."))));
httpRequestComponent.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf("."))));
log.info("开始下载:{}m3u8地址{}", videoInfo.getTitle(), videoInfo.getM3u8Url());
String result = HttpUtils.get(videoInfo.getM3u8Url());
String result = httpRequestComponent.get(videoInfo.getM3u8Url());
Path path = Paths.get(downloadBasePath, "m3u8", videoInfo.getId().toString());
if (!Files.exists(path)) {
Files.createDirectories(path);
@ -162,16 +163,15 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
List<String> tss = Arrays.stream(result.split("\n")).filter(t -> t.contains(".ts")).toList();
int size = tss.size();
for (int i = 0; i < size; i++) {
String ts = tss.get(i);
log.info("正在下载ts文件已下载{},共:{},当前进度:{}%", i, size, String.format("%.2f", (i * 100.0 / size)));
Path tsPath = Paths.get(path.toString(), ts);
HttpUtils.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath);
httpRequestComponent.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath);
}
videoInfo.setM3u8(false);
updateById(videoInfo);
} catch (Exception e) {
log.error("下载失败,标题:{},失败原因:{},地址{}", videoInfo.getTitle(), videoInfo.getId(), e.getMessage());
log.error("下载失败,标题:{}, 失败原因{}", videoInfo.getTitle(), e.getMessage(), e);
}
});
}

View File

@ -0,0 +1,113 @@
package top.yuchat.crawler.video.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpStatus;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.NoopHostnameVerifier;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.ssl.SSLContextBuilder;
import org.apache.http.util.EntityUtils;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.security.KeyManagementException;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
@Slf4j
@Component
public class HttpRequestComponent {
private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080);
private CloseableHttpClient closeableHttpClient;
private RequestConfig requestConfig;
private int socketTimeout = 5000;
private int connectTimeout = 5000;
private int connectRequestTimeout = 5000;
private int maxTotal = 500;
private int maxPerRoute = 120;
public HttpRequestComponent() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
init();
createHttpClients();
}
public void createHttpClients() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
SSLContextBuilder builder = new SSLContextBuilder();
builder.loadTrustMaterial(null, (_, _) -> true);
SSLConnectionSocketFactory sslref = new SSLConnectionSocketFactory(builder.build(), NoopHostnameVerifier.INSTANCE);
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create().register("http", new PlainConnectionSocketFactory()).register("https", sslref).build();
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(registry);
cm.setMaxTotal(maxTotal);
cm.setDefaultMaxPerRoute(maxPerRoute);
closeableHttpClient = HttpClients.custom()
.setSSLSocketFactory(sslref)
.setConnectionManager(cm)
.setRetryHandler(new DefaultHttpRequestRetryHandler(5, true))
.setConnectionManagerShared(true)
.build();
}
public void init() {
this.requestConfig = RequestConfig.custom()
.setSocketTimeout(socketTimeout)
.setConnectTimeout(connectTimeout)
.setConnectionRequestTimeout(connectRequestTimeout)
// .setProxy(PROXY)
.build();
}
public String get(String url) throws IOException {
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(requestConfig);
CloseableHttpResponse response = closeableHttpClient.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (HttpStatus.SC_OK != statusCode) {
log.error("请求失败,状态码:{}", statusCode);
throw new RuntimeException("请求失败,状态码:" + statusCode);
}
HttpEntity entity = response.getEntity();
return EntityUtils.toString(entity);
}
public void download(String url, Path path) throws IOException {
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(requestConfig);
CloseableHttpResponse response = closeableHttpClient.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (HttpStatus.SC_OK != statusCode) {
log.error("文件下载失败,状态码:{}", statusCode);
throw new RuntimeException("文件下载失败");
}
HttpEntity entity = response.getEntity();
if (Files.exists(path)) {
long contentLength = entity.getContentLength();
long size = Files.size(path);
log.warn("文件已存在,请求文件大小:{},本地文件大小:{}", contentLength, size);
if (size >= contentLength) {
log.warn("文件已存在,跳过下载,文件路径:{}", path);
response.close();
return;
}
log.warn("文件不完整删除重新下载PATH{}", path);
Files.delete(path);
}
Files.copy(entity.getContent(), path);
}
}

View File

@ -1,53 +0,0 @@
package top.yuchat.crawler.video.utils;
import lombok.extern.slf4j.Slf4j;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.DefaultProxyRoutePlanner;
import org.apache.http.util.EntityUtils;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
@Slf4j
public class HttpUtils {
private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080);
private static final RequestConfig CONFIG = RequestConfig.custom().setProxy(PROXY).build();
private static final HttpClient HTTP_CLIENT = HttpClients.createDefault();
public static String get(String url) throws IOException {
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(CONFIG);
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (HttpStatus.SC_OK != statusCode) {
log.error("请求失败,状态码:{}", statusCode);
throw new RuntimeException("请求失败,状态码:" + statusCode);
}
HttpEntity entity = response.getEntity();
return EntityUtils.toString(entity);
}
public static void download(String url, Path path) throws IOException {
HttpGet httpGet = new HttpGet(url);
httpGet.setConfig(CONFIG);
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
int statusCode = response.getStatusLine().getStatusCode();
if (HttpStatus.SC_OK != statusCode) {
log.error("文件下载失败,状态码:{}", statusCode);
throw new RuntimeException("文件下载失败");
}
HttpEntity entity = response.getEntity();
Files.copy(entity.getContent(), path);
}
}