优化下载线程,提高下载速度
This commit is contained in:
parent
54f605382b
commit
edb308a495
@ -12,10 +12,10 @@ import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import top.yuchat.crawler.video.utils.HttpUtils;
|
||||
import top.yuchat.crawler.video.models.entity.ClassifyInfo;
|
||||
import top.yuchat.crawler.video.models.entity.MadouVideoInfo;
|
||||
import top.yuchat.crawler.video.models.mapper.MadouVideoMapper;
|
||||
import top.yuchat.crawler.video.utils.HttpRequestComponent;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@ -56,8 +56,9 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
public String downloadBasePath;
|
||||
|
||||
private final ClassifyInfoService classifyInfoService;
|
||||
private final HttpRequestComponent httpRequestComponent;
|
||||
|
||||
ExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(16);
|
||||
ExecutorService scheduledThreadPool = Executors.newScheduledThreadPool(72);
|
||||
|
||||
|
||||
public void getVideoList() {
|
||||
@ -83,7 +84,7 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
|
||||
|
||||
public void saveVideo(String url, String classify) throws IOException {
|
||||
String html = HttpUtils.get(url);
|
||||
String html = httpRequestComponent.get(url);
|
||||
Document document = Jsoup.parse(html);
|
||||
|
||||
Elements imgPs = document.getElementsByClass("img");
|
||||
@ -115,7 +116,7 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
for (MadouVideoInfo madouVideoInfo : list) {
|
||||
String url = BASE_URL + madouVideoInfo.getM3u8Url();
|
||||
try {
|
||||
String html = HttpUtils.get(url);
|
||||
String html = httpRequestComponent.get(url);
|
||||
|
||||
Document document = Jsoup.parse(html);
|
||||
Element bofangBox = document.getElementById("bofang_box");
|
||||
@ -148,10 +149,10 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
scheduledThreadPool.submit(() -> {
|
||||
try {
|
||||
log.info("下载封面图片,名称:{},地址:{}", videoInfo.getTitle(), videoInfo.getCoverUrl());
|
||||
HttpUtils.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf("."))));
|
||||
httpRequestComponent.download(videoInfo.getCoverUrl(), Paths.get(imgPath.toString(), videoInfo.getId().toString() + videoInfo.getCoverUrl().substring(videoInfo.getCoverUrl().lastIndexOf("."))));
|
||||
|
||||
log.info("开始下载:{},m3u8地址:{}", videoInfo.getTitle(), videoInfo.getM3u8Url());
|
||||
String result = HttpUtils.get(videoInfo.getM3u8Url());
|
||||
String result = httpRequestComponent.get(videoInfo.getM3u8Url());
|
||||
Path path = Paths.get(downloadBasePath, "m3u8", videoInfo.getId().toString());
|
||||
if (!Files.exists(path)) {
|
||||
Files.createDirectories(path);
|
||||
@ -162,16 +163,15 @@ public class MadouVideoService extends ServiceImpl<MadouVideoMapper, MadouVideoI
|
||||
List<String> tss = Arrays.stream(result.split("\n")).filter(t -> t.contains(".ts")).toList();
|
||||
int size = tss.size();
|
||||
for (int i = 0; i < size; i++) {
|
||||
|
||||
String ts = tss.get(i);
|
||||
log.info("正在下载ts文件,已下载:{},共:{},当前进度:{}%", i, size, String.format("%.2f", (i * 100.0 / size)));
|
||||
Path tsPath = Paths.get(path.toString(), ts);
|
||||
HttpUtils.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath);
|
||||
httpRequestComponent.download(videoInfo.getM3u8Url().replace("index.m3u8", ts), tsPath);
|
||||
}
|
||||
videoInfo.setM3u8(false);
|
||||
updateById(videoInfo);
|
||||
} catch (Exception e) {
|
||||
log.error("下载失败,标题:{},失败原因:{},地址:{}", videoInfo.getTitle(), videoInfo.getId(), e.getMessage());
|
||||
log.error("下载失败,标题:{}, 失败原因:{}", videoInfo.getTitle(), e.getMessage(), e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@ -0,0 +1,113 @@
|
||||
package top.yuchat.crawler.video.utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.config.RequestConfig;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.config.Registry;
|
||||
import org.apache.http.config.RegistryBuilder;
|
||||
import org.apache.http.conn.socket.ConnectionSocketFactory;
|
||||
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
|
||||
import org.apache.http.conn.ssl.NoopHostnameVerifier;
|
||||
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
|
||||
import org.apache.http.impl.client.CloseableHttpClient;
|
||||
import org.apache.http.impl.client.DefaultHttpRequestRetryHandler;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
|
||||
import org.apache.http.ssl.SSLContextBuilder;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.security.KeyManagementException;
|
||||
import java.security.KeyStoreException;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
public class HttpRequestComponent {
|
||||
private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080);
|
||||
|
||||
private CloseableHttpClient closeableHttpClient;
|
||||
private RequestConfig requestConfig;
|
||||
private int socketTimeout = 5000;
|
||||
private int connectTimeout = 5000;
|
||||
private int connectRequestTimeout = 5000;
|
||||
private int maxTotal = 500;
|
||||
private int maxPerRoute = 120;
|
||||
|
||||
public HttpRequestComponent() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
|
||||
init();
|
||||
createHttpClients();
|
||||
}
|
||||
|
||||
public void createHttpClients() throws NoSuchAlgorithmException, KeyStoreException, KeyManagementException {
|
||||
SSLContextBuilder builder = new SSLContextBuilder();
|
||||
builder.loadTrustMaterial(null, (_, _) -> true);
|
||||
SSLConnectionSocketFactory sslref = new SSLConnectionSocketFactory(builder.build(), NoopHostnameVerifier.INSTANCE);
|
||||
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create().register("http", new PlainConnectionSocketFactory()).register("https", sslref).build();
|
||||
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(registry);
|
||||
cm.setMaxTotal(maxTotal);
|
||||
cm.setDefaultMaxPerRoute(maxPerRoute);
|
||||
closeableHttpClient = HttpClients.custom()
|
||||
.setSSLSocketFactory(sslref)
|
||||
.setConnectionManager(cm)
|
||||
.setRetryHandler(new DefaultHttpRequestRetryHandler(5, true))
|
||||
.setConnectionManagerShared(true)
|
||||
.build();
|
||||
}
|
||||
|
||||
public void init() {
|
||||
this.requestConfig = RequestConfig.custom()
|
||||
.setSocketTimeout(socketTimeout)
|
||||
.setConnectTimeout(connectTimeout)
|
||||
.setConnectionRequestTimeout(connectRequestTimeout)
|
||||
// .setProxy(PROXY)
|
||||
.build();
|
||||
}
|
||||
|
||||
public String get(String url) throws IOException {
|
||||
HttpGet httpGet = new HttpGet(url);
|
||||
httpGet.setConfig(requestConfig);
|
||||
CloseableHttpResponse response = closeableHttpClient.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (HttpStatus.SC_OK != statusCode) {
|
||||
log.error("请求失败,状态码:{}", statusCode);
|
||||
throw new RuntimeException("请求失败,状态码:" + statusCode);
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
return EntityUtils.toString(entity);
|
||||
}
|
||||
|
||||
public void download(String url, Path path) throws IOException {
|
||||
HttpGet httpGet = new HttpGet(url);
|
||||
httpGet.setConfig(requestConfig);
|
||||
CloseableHttpResponse response = closeableHttpClient.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (HttpStatus.SC_OK != statusCode) {
|
||||
log.error("文件下载失败,状态码:{}", statusCode);
|
||||
throw new RuntimeException("文件下载失败");
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
|
||||
if (Files.exists(path)) {
|
||||
long contentLength = entity.getContentLength();
|
||||
long size = Files.size(path);
|
||||
log.warn("文件已存在,请求文件大小:{},本地文件大小:{}", contentLength, size);
|
||||
if (size >= contentLength) {
|
||||
log.warn("文件已存在,跳过下载,文件路径:{}", path);
|
||||
response.close();
|
||||
return;
|
||||
}
|
||||
log.warn("文件不完整,删除重新下载,PATH:{}", path);
|
||||
Files.delete(path);
|
||||
}
|
||||
Files.copy(entity.getContent(), path);
|
||||
}
|
||||
|
||||
}
|
||||
@ -1,53 +0,0 @@
|
||||
package top.yuchat.crawler.video.utils;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.HttpHost;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.config.RequestConfig;
|
||||
import org.apache.http.client.methods.CloseableHttpResponse;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.impl.client.DefaultHttpClient;
|
||||
import org.apache.http.impl.client.HttpClients;
|
||||
import org.apache.http.impl.conn.DefaultProxyRoutePlanner;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
@Slf4j
|
||||
public class HttpUtils {
|
||||
private static final HttpHost PROXY = new HttpHost("172.20.0.1", 1080);
|
||||
private static final RequestConfig CONFIG = RequestConfig.custom().setProxy(PROXY).build();
|
||||
private static final HttpClient HTTP_CLIENT = HttpClients.createDefault();
|
||||
|
||||
public static String get(String url) throws IOException {
|
||||
HttpGet httpGet = new HttpGet(url);
|
||||
httpGet.setConfig(CONFIG);
|
||||
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (HttpStatus.SC_OK != statusCode) {
|
||||
log.error("请求失败,状态码:{}", statusCode);
|
||||
throw new RuntimeException("请求失败,状态码:" + statusCode);
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
return EntityUtils.toString(entity);
|
||||
}
|
||||
|
||||
public static void download(String url, Path path) throws IOException {
|
||||
HttpGet httpGet = new HttpGet(url);
|
||||
httpGet.setConfig(CONFIG);
|
||||
CloseableHttpResponse response = (CloseableHttpResponse) HTTP_CLIENT.execute(httpGet);
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (HttpStatus.SC_OK != statusCode) {
|
||||
log.error("文件下载失败,状态码:{}", statusCode);
|
||||
throw new RuntimeException("文件下载失败");
|
||||
}
|
||||
HttpEntity entity = response.getEntity();
|
||||
Files.copy(entity.getContent(), path);
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user