需要配合html2md工具 https://eqishare.com/technology/1236.html
需求:批量把博客文章转成md文档。
代码只是简单调试成功运行,自行优化使用。
引入pom:
<!--引入HTML转Markdown的插件--> <dependency> <groupId>io.github.furstenheim</groupId> <artifactId>copy_down</artifactId> <version>1.0</version> </dependency>
代码:
package com.luchao.untils;
import com.alibaba.fastjson.JSONObject;
import io.github.furstenheim.CopyDown;
import io.github.furstenheim.Options;
import io.github.furstenheim.OptionsBuilder;
import java.io.*;
import java.net.URL;
import java.util.HashMap;
/**
* html转md
*
* @author ludeng
* @date 2024-07-25 11:34
*/
public class test2 {
public static void main(String[] args) {
test2 test = new test2();
String filePath = "D:\\alllinks.txt"; // 替换为实际的文件路径
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) {
String line;
while ((line = br.readLine()) != null) {
System.out.println(line);
test.getmd(line);
}
} catch (IOException e) {
System.out.println("读取文件时出错:" + e.getMessage());
}
}
/**
* 测试GET请求
*/
private void testGet() {
String gurl = "https://www.xxxx.com/video/1232.html";
String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl;
try {
String result = HttpClientUtils.get(url);
System.out.println(result);
} catch (IOException e) {
e.printStackTrace();
}
}
private void getmd(String blogUrl) {
try {
// String gurl = "https://www.xxxx.com/video/1232.html";
String gurl = blogUrl;
String url = "https://www.helloworld.net/getUrlHtml?url=" + gurl;
String result = HttpClientUtils.get(url);
System.out.println(result);
JSONObject jsonObject = JSONObject.parseObject(result);
jsonObject.get("code");
jsonObject.get("html");
jsonObject.get("title");
//没报错
if (null != jsonObject.get("code") && "1".equals(jsonObject.get("code").toString())) {
String zzhh = htmlTansToMarkdown((jsonObject.get("html").toString()));
// System.out.println(zzhh);
String urlGetMd = "https://www.helloworld.net/getMdFile";
HashMap<String, String> map = new HashMap<>();
map.put("md", zzhh);
map.put("url", "https://www.helloworld.net");
String result1 = HttpClientUtils.post(urlGetMd, map);
System.out.println(result1);
JSONObject jsonObject1 = JSONObject.parseObject(result1);
jsonObject1.get("path");
saveFile(jsonObject1.get("path").toString(), jsonObject.get("title").toString());
}
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 转换
*
* @param htmlStr
* @return
*/
public static String htmlTansToMarkdown(String htmlStr) {
OptionsBuilder optionsBuilder = OptionsBuilder.anOptions();
Options options = optionsBuilder.withBr("-")
// more options
.build();
CopyDown converter = new CopyDown(options);
String markdownText = converter.convert(htmlStr);
return markdownText;
}
/**
* 保存文件
*
* @param urlstr
* @param newName
*/
public static void saveFile(String urlstr, String newName) {
// String fileURL = "https://www.helloworld.net/download/xxx.md"; // 替换为实际的在线文档URL
String fileURL = urlstr; // 替换为实际的在线文档URL
String saveFilePath = "D:\\" + newName + ".md"; // 替换为实际的保存路径
try (BufferedInputStream in = new BufferedInputStream(new URL(fileURL).openStream());
FileOutputStream fileOutputStream = new FileOutputStream(saveFilePath)) {
byte dataBuffer[] = new byte[1024];
int bytesRead;
while ((bytesRead = in.read(dataBuffer, 0, 1024)) != -1) {
fileOutputStream.write(dataBuffer, 0, bytesRead);
}
System.out.println("文件下载成功:" + newName);
} catch (IOException e) {
System.out.println("下载文件时出错:" + e.getMessage());
}
}
}






