package com.ruoyi.common.html;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.util.stream.Collectors;

/**
 * 解析html
 */
public class AnalysisHTML {

    public static String explainHTML() throws IOException {
        String folderPath = "E:\\pc2";
        // 获取文件夹下的所有文件
        File[] files = new File(folderPath).listFiles();
        // 遍历文件夹下的所有文件
        int i = 0;
        int b = 0;
        a:
        for (File file : files) {
            // 获取文件名
            String fileName = file.getName();
            System.out.println(fileName);
            // 获取文件内容
            String fileContent = new String(Files.readAllBytes(file.toPath()));

            Document parse = Jsoup.parse(fileContent);

            String title = parse.title();
            Element body = parse.body();
            Elements elementsByClass = body.getElementsByClass("topic__type-body");
            Elements children = elementsByClass.get(0).children();
            for (Element element : children) {
                int legend = element.select("legend").size();
                File file1 = null;
                if (legend == 0) {
                    //跳过该文件
                    file1 = new File("E:\\noexplain");
                    b++;
                    continue a;
                }
                if (!element.getElementsByTag("legend").attr("class").equals("topic__type-title")) {
                    //跳过该文件
                    file1 = new File("E:\\noexplain");
                    b++;
                    continue a;
                }
            }

            for (Element element : children) {
                i++;
                //获取题目
                String legend = element.getElementsByTag("legend").text();
                System.out.println("题目： " + legend);
                //将题目保存到表中

                //判断该题目下是否有选项
                Elements elementsByClass1 = element.getElementsByClass("topic__type-dry");
                if (elementsByClass1.size() != 0) {
                    Elements spans = elementsByClass1.get(0).getElementsByTag("span");
                    String item = "";
                    for (Element span : spans) {
                        Elements labels = span.getElementsByTag("label");
                        for (Element label : labels) {
                            String text = label.text();
                            //将题目存库
                            item = item + "------" + text;
                        }
                    }
                    System.out.println("选项： " + item);
                }

            }

        }
        System.out.println(i + "    " + b);

        return null;
    }

    public static void main(String[] args) throws IOException {
        explainHTML();
    }
}