| | |
| | | import java.nio.file.Paths; |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | import java.util.regex.Matcher; |
| | | import java.util.regex.Pattern; |
| | | |
| | | public class pctest { |
| | | |
| | | public static void main(String[] args) { |
| | | String str = null; |
| | | // for (int i = 51; i <= 189; i++) { |
| | | // System.out.print(i + " "); |
| | | try { |
| | | // 待匹配的字符串 |
| | | String sentence = "135465465好46546"; |
| | | |
| | | // long l = System.currentTimeMillis(); |
| | | //// str = "2263œ189œœ232105105§胸部肿瘤科日间病房患者<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§11¤232334680§胸部肿瘤科患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§19¤232313714§胸部肿瘤科食管癌患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§20¤232294126§胸部肿瘤科肺癌患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§22¤232231895§TKA术后<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§16¤232349695§<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>1月§29¤232303115§高糖<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>工作培训试题§16¤232295017§肿瘤<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>登记培训测试题§15¤232233427§老年健康监测队列<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>培训测试§10¤232215217§24病区IBD患者<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>问卷(8.22-8.28)§20¤232169849§2023.8.25小金人俱乐部MDT门诊<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§9¤232173705§<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>信息反馈表§6¤"; |
| | | //// str = HttpUtils.sendGet("https://www.wjx.cn/handler/Search.ashx?input=%E9%9A%8F%E8%AE%BF&qc=&cp=" + i + "&nw=1&qt=1&accurate=0&t=" + l); |
| | | // while (true) { |
| | | // if (str.contains("<font")) { |
| | | // str = str.substring(0, str.indexOf("<font")) + str.substring(str.indexOf("</font>") + 7); |
| | | // } else { |
| | | //// System.out.println(str); |
| | | // 定义正则表达式 |
| | | // String regex = ".*吃了.*早上好.*"; |
| | | // String regex = ".*吃了.*|.*早上好.*"; |
| | | // String regex = "^(?!.*很好,吃了吗).*|.*没吃.*早上好.*医院.*"; |
| | | String regex = "^(?!.别|不可|不可靠)(.(好|幸福)+.*)$"; |
| | | |
| | | List<String> list = new ArrayList<>(); |
| | | String folderPath = "E:/pc"; |
| | | try { |
| | | Files.walk(Paths.get(folderPath)).filter(Files::isRegularFile).forEach(path -> list.add(path.getFileName().toString())); |
| | | } catch (IOException e) { |
| | | System.out.println("An error occurred while traversing the folder."); |
| | | e.printStackTrace(); |
| | | } |
| | | // 编译正则表达式 |
| | | Pattern pattern = Pattern.compile(regex); |
| | | |
| | | for (String name : list) { |
| | | name = name.substring(0, name.indexOf(".html")); |
| | | // String sd = "https://www.wjx.cn/xz/" + name + ".aspx"; |
| | | // String content = HttpUtils.sendGet(sd, "review=1&u_atoken=31d3a0e6-e00e-483c-b956-b6177972e734&u_asession=01691hXKO9p1D59fHgd0NAIGdr-8HrPakCUS6X_v_Z43ohuMPPQJnwrrPl3mly-UFXYrO71Pz35K7wOrV6v8t3g9sq8AL43dpOnCClYrgFm6o&u_asig=052KZ_HPijdQddSitCTcvhNlwy7cGO2t46JgQGTyclFPT7xwZjoK0YBNED-DbuN5gugFYO36lFqDfxMpnRFufeLFk7BSuILL02zWW0QNCM-NWgLElQ9wTw7yMg4BFlXpIhOVzkXl1W45ZWoBPny57uZ3WVmTrcdvNBGM1L1rWev0UwpA9oQuqX4mfRyWspmK8gksmHjM0JOodanL5-M1Qs1Srh04GpT7zItRH0TxDThMR15LDAE7pTBPfvFUyoAgUl5gGKGQH-vEmN2Ae1t4GLLQkS6fjps_twKYxR-u2135jY94r_LXIIil3Y3aVPRGAe&u_aref=gk2k7DQa4vfdgaFwA4iVd7KUI4Q%3D", Constants.UTF8); |
| | | // String sd = "https://www.wjx.cn/xz/214877887.aspx?review=1&u_atoken=7ea94b7e-ff2c-4f9f-a16a-2377e7b932f7&u_asession=01mUmaPv86XmI35R93EFzW4vAn-_bjd_hg8O3aV3Qj_hY4LTkzoq3vcQyaBAt_bXG0L3UvT1OC8yhlQ_Vrlj1nD9sq8AL43dpOnCClYrgFm6o&u_asig=05qqfmDpV5jnzQ3zaOR-kKvhCvox19kVoC-lC8wI68PnJ7lkckawOfhM2Sz6TSlLyxhPCLiM-UkS5JtWfnNrZeTMme2Xl4_nh34yljc8iiZeJybY6mcSFGRriqnFvQfUeuCvV5odDJuWRnGyKMRbx_RypafWqYj328He90miD2j6an7QQBtuwLgtjfNI6ktmk7ksmHjM0JOodanL5-M1Qs1cOGKb4pT6nOz7TsF50O_Y5mwcLzFTlOSTrqpuPzNwho1AC5ZokKrUg3JS1GlbH-sRHfAvD91UjmwUK--AQbESDY94r_LXIIil3Y3aVPRGAe&u_aref=muWANYNZeB7WW%2B%2F7kLKv3uRT8ws%3D"; |
| | | // String content = HttpUtils.sendGet(sd); |
| | | HttpRequest get = HttpUtil.createGet("https://www.wjx.cn/xz/215122141.aspx?review=1&u_atoken=ae7fe80c-bf23-4e45-bcda-e1a79738fa28&u_asession=014vQCkRvW7c3i2ofZKQ4cwL_Kbllht28bP58Yv5WaOmib7bTm0yOu3E9ghHeDwRromXsXiqHPUHP-r4N4zkLsNdsq8AL43dpOnCClYrgFm6o&u_asig=05cv8a98zgMB5Y5sCj2tshR0ymwhcsf5o0ZnRdzw_MWEqjWv9qUXA2y-W56hqxwwu62XCEZ1EDfpROWbv7u5K0esMlHJrRxH3x6MobynAbRVK73TZbz-pPzJrSvkDKmiOfptL_k4wm25Oa44SC2LnI-f_gUMOBxNBqpVgCnR54boOBWxWY3t65iSXiQbR5_szmksmHjM0JOodanL5-M1Qs1XAPImYHnI2tMZUJbFSrHeeZf56fnMo9lJFDBHsYIUFUSQYCfCQCLXjf34sWIadxtsAb6Kn2CIzT3kClvRmyaM_Y94r_LXIIil3Y3aVPRGAe&u_aref=OUtfes0VUTyTV7SKqEByRa9VGaU%3D"); |
| | | String content = get.execute().body(); |
| | | //创建文件夹 |
| | | Path folder = Paths.get("E:/pc2/" + name + ".html"); |
| | | Files.createFile(folder); |
| | | Files.write(Paths.get("E:/pc2/" + name + ".html"), content.getBytes()); |
| | | } |
| | | // 创建 Matcher 对象 |
| | | Matcher matcher = pattern.matcher(sentence); |
| | | |
| | | |
| | | // int index = 0; |
| | | // while (true) { |
| | | // int dd = str.indexOf("¤", index) + 1; |
| | | // String newStr = str.substring(dd, str.indexOf("§", dd)); |
| | | // index = dd; |
| | | // String sd = "https://www.wjx.cn/xz/" + newStr + ".aspx"; |
| | | // String content = HttpUtils.sendGet(sd, "review=1"); |
| | | // |
| | | // //创建文件夹 |
| | | // Path folder = Paths.get("E:/pc/" + newStr + ".html"); |
| | | // Files.createFile(folder); |
| | | // Files.write(Paths.get("E:/pc/" + newStr + ".html"), content.getBytes()); |
| | | // |
| | | // if (index == -1) { |
| | | // break; |
| | | // } |
| | | // } |
| | | // break; |
| | | // } |
| | | |
| | | // } |
| | | |
| | | } catch (Exception e) { |
| | | // 进行匹配 |
| | | if (matcher.matches()) { |
| | | System.out.println("句子中包含\"吃了,早上好\"。"); |
| | | } else { |
| | | System.out.println("句子中不包含\"吃了,早上好\"。"); |
| | | } |
| | | // String str = null; |
| | | //// for (int i = 51; i <= 189; i++) { |
| | | //// System.out.print(i + " "); |
| | | // try { |
| | | // |
| | | //// long l = System.currentTimeMillis(); |
| | | ////// str = "2263œ189œœ232105105§胸部肿瘤科日间病房患者<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§11¤232334680§胸部肿瘤科患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§19¤232313714§胸部肿瘤科食管癌患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§20¤232294126§胸部肿瘤科肺癌患者出院<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§22¤232231895§TKA术后<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§16¤232349695§<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>1月§29¤232303115§高糖<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>工作培训试题§16¤232295017§肿瘤<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>登记培训测试题§15¤232233427§老年健康监测队列<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>培训测试§10¤232215217§24病区IBD患者<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>问卷(8.22-8.28)§20¤232169849§2023.8.25小金人俱乐部MDT门诊<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>§9¤232173705§<font color='#30a6f5'>随</font><font color='#30a6f5'>访</font>信息反馈表§6¤"; |
| | | ////// str = HttpUtils.sendGet("https://www.wjx.cn/handler/Search.ashx?input=%E9%9A%8F%E8%AE%BF&qc=&cp=" + i + "&nw=1&qt=1&accurate=0&t=" + l); |
| | | //// while (true) { |
| | | //// if (str.contains("<font")) { |
| | | //// str = str.substring(0, str.indexOf("<font")) + str.substring(str.indexOf("</font>") + 7); |
| | | //// } else { |
| | | ////// System.out.println(str); |
| | | // |
| | | // List<String> list = new ArrayList<>(); |
| | | // String folderPath = "E:/pc"; |
| | | // try { |
| | | // Files.walk(Paths.get(folderPath)).filter(Files::isRegularFile).forEach(path -> list.add(path.getFileName().toString())); |
| | | // } catch (IOException e) { |
| | | // System.out.println("An error occurred while traversing the folder."); |
| | | // e.printStackTrace(); |
| | | // } |
| | | // |
| | | // for (String name : list) { |
| | | // name = name.substring(0, name.indexOf(".html")); |
| | | //// String sd = "https://www.wjx.cn/xz/" + name + ".aspx"; |
| | | //// String content = HttpUtils.sendGet(sd, "review=1&u_atoken=31d3a0e6-e00e-483c-b956-b6177972e734&u_asession=01691hXKO9p1D59fHgd0NAIGdr-8HrPakCUS6X_v_Z43ohuMPPQJnwrrPl3mly-UFXYrO71Pz35K7wOrV6v8t3g9sq8AL43dpOnCClYrgFm6o&u_asig=052KZ_HPijdQddSitCTcvhNlwy7cGO2t46JgQGTyclFPT7xwZjoK0YBNED-DbuN5gugFYO36lFqDfxMpnRFufeLFk7BSuILL02zWW0QNCM-NWgLElQ9wTw7yMg4BFlXpIhOVzkXl1W45ZWoBPny57uZ3WVmTrcdvNBGM1L1rWev0UwpA9oQuqX4mfRyWspmK8gksmHjM0JOodanL5-M1Qs1Srh04GpT7zItRH0TxDThMR15LDAE7pTBPfvFUyoAgUl5gGKGQH-vEmN2Ae1t4GLLQkS6fjps_twKYxR-u2135jY94r_LXIIil3Y3aVPRGAe&u_aref=gk2k7DQa4vfdgaFwA4iVd7KUI4Q%3D", Constants.UTF8); |
| | | //// String sd = "https://www.wjx.cn/xz/214877887.aspx?review=1&u_atoken=7ea94b7e-ff2c-4f9f-a16a-2377e7b932f7&u_asession=01mUmaPv86XmI35R93EFzW4vAn-_bjd_hg8O3aV3Qj_hY4LTkzoq3vcQyaBAt_bXG0L3UvT1OC8yhlQ_Vrlj1nD9sq8AL43dpOnCClYrgFm6o&u_asig=05qqfmDpV5jnzQ3zaOR-kKvhCvox19kVoC-lC8wI68PnJ7lkckawOfhM2Sz6TSlLyxhPCLiM-UkS5JtWfnNrZeTMme2Xl4_nh34yljc8iiZeJybY6mcSFGRriqnFvQfUeuCvV5odDJuWRnGyKMRbx_RypafWqYj328He90miD2j6an7QQBtuwLgtjfNI6ktmk7ksmHjM0JOodanL5-M1Qs1cOGKb4pT6nOz7TsF50O_Y5mwcLzFTlOSTrqpuPzNwho1AC5ZokKrUg3JS1GlbH-sRHfAvD91UjmwUK--AQbESDY94r_LXIIil3Y3aVPRGAe&u_aref=muWANYNZeB7WW%2B%2F7kLKv3uRT8ws%3D"; |
| | | //// String content = HttpUtils.sendGet(sd); |
| | | // HttpRequest get = HttpUtil.createGet("https://www.wjx.cn/xz/215122141.aspx?review=1&u_atoken=ae7fe80c-bf23-4e45-bcda-e1a79738fa28&u_asession=014vQCkRvW7c3i2ofZKQ4cwL_Kbllht28bP58Yv5WaOmib7bTm0yOu3E9ghHeDwRromXsXiqHPUHP-r4N4zkLsNdsq8AL43dpOnCClYrgFm6o&u_asig=05cv8a98zgMB5Y5sCj2tshR0ymwhcsf5o0ZnRdzw_MWEqjWv9qUXA2y-W56hqxwwu62XCEZ1EDfpROWbv7u5K0esMlHJrRxH3x6MobynAbRVK73TZbz-pPzJrSvkDKmiOfptL_k4wm25Oa44SC2LnI-f_gUMOBxNBqpVgCnR54boOBWxWY3t65iSXiQbR5_szmksmHjM0JOodanL5-M1Qs1XAPImYHnI2tMZUJbFSrHeeZf56fnMo9lJFDBHsYIUFUSQYCfCQCLXjf34sWIadxtsAb6Kn2CIzT3kClvRmyaM_Y94r_LXIIil3Y3aVPRGAe&u_aref=OUtfes0VUTyTV7SKqEByRa9VGaU%3D"); |
| | | // String content = get.execute().body(); |
| | | // //创建文件夹 |
| | | // Path folder = Paths.get("E:/pc2/" + name + ".html"); |
| | | // Files.createFile(folder); |
| | | // Files.write(Paths.get("E:/pc2/" + name + ".html"), content.getBytes()); |
| | | // } |
| | | // |
| | | // |
| | | //// int index = 0; |
| | | //// while (true) { |
| | | //// int dd = str.indexOf("¤", index) + 1; |
| | | //// String newStr = str.substring(dd, str.indexOf("§", dd)); |
| | | //// index = dd; |
| | | //// String sd = "https://www.wjx.cn/xz/" + newStr + ".aspx"; |
| | | //// String content = HttpUtils.sendGet(sd, "review=1"); |
| | | //// |
| | | //// //创建文件夹 |
| | | //// Path folder = Paths.get("E:/pc/" + newStr + ".html"); |
| | | //// Files.createFile(folder); |
| | | //// Files.write(Paths.get("E:/pc/" + newStr + ".html"), content.getBytes()); |
| | | //// |
| | | //// if (index == -1) { |
| | | //// break; |
| | | //// } |
| | | //// } |
| | | //// break; |
| | | //// } |
| | | // |
| | | //// } |
| | | // |
| | | // } catch (Exception e) { |
| | | // } |
| | | } |
| | | } |
| | | //} |
| | | |