最近要改造自动用例生成平台,想用element-ui改造一下前端,但是如果用webpack会比较麻烦,于是想着下载下来直接引用更方便。但是网上找了一下,有个python的要比较多C币下载,咱虽然有不少C币但这种小活干嘛不自己做一下,于是就自己写了个脚本。
至于引用本地element-ui的方法及引用后使用方法看我另一篇博文:
只需要修改所要下载的版本号
以及下载下来后文件所要存放的位置
即可。
代码如下:
#encoding:utf-8
from bs4 import BeautifulSoup
import requests,re,os,socket
from urllib import request
#指定要下载的版本
element_ui_version = "2.13.0"
#指定文件要存放的位置
element_ui_dir = "D:/tmp"
save_ui_dir = os.path.join(element_ui_dir,"element-ui")
if not os.path.isdir(save_ui_dir):
os.makedirs(save_ui_dir)
element_ui_url = "https://unpkg.com/browse/element-ui@" + element_ui_version + "/"
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0"
}
def get_page(url,save_dir):
print("Current Page: ",url)
response = requests.get(url,headers=headers)
soup = BeautifulSoup(str(response.content), "lxml")
tbody = soup.find("tbody")
rule_name = r'href="(.+?)"'
td_href = re.findall(rule_name,str(tbody))
dir_list = []
for href in td_href:
href_path = os.path.join(save_dir, href)
if href == "../":
pass
elif "/" in href:
os.mkdir(href_path)
print("Makedir: ",href_path.replace(save_ui_dir,""))
dir_list.append(href)
else:
file_url = url + href
abs_name = file_url.replace(element_ui_url, "")
print("Download: ", abs_name)
get_file(file_url,href_path)
for sub_dir in dir_list:
sub_url = url + sub_dir
sub_dir = os.path.join(save_dir,sub_dir)
get_page(sub_url,sub_dir)
def get_file(url,filename):
opener =request.build_opener()
opener.addheaders = [('User-agent',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0')]
request.install_opener(opener)
socket.setdefaulttimeout(30)
url = url.replace("browse/","")
count = 1
while count <= 5:
try:
request.urlretrieve(url, filename)
break
except socket.timeout:
err_info = '<Timeout> Reloading for %d time' % count
print(err_info)
count += 1
except Exception as e:
err_info = '<'+str(e)+'> Reloading for %d time' % count
print(err_info)
count += 1
if count > 5:
print("<Error> download job failed!")
else:
pass
get_page(element_ui_url,save_ui_dir)
这个是网上找的,本质都是差不多的,把目录页面爬取下来,然后把href提取出来。
方法:新建一个txt改名xxx.java,然后修改版本及存放位置,最后cmd执行java xxx.java即可
代码如下:
package com.ycr;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
public class Main {
static String fileP = "D:\\tmp\\element-ui\\";
static String urlP = "https://unpkg.com/browse/element-ui@2.13.0/";
static String urlF = "https://unpkg.com/element-ui@2.13.0/";
public static void main(String[] args){
try {
GetPage("");
} catch (Exception e) {
e.printStackTrace();
}
}
static void GetPage(String after) throws Exception {
System.out.println(urlP + after);
new File(fileP + after).mkdir();
HttpURLConnection http = (HttpURLConnection) (new URL(urlP + after)).openConnection();
http.setRequestMethod("GET");
http.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3562.0 Safari/537.36");
http.connect();
if(http.getResponseCode() == 200) {
InputStream inputStream = http.getInputStream();
byte [] buffer = new byte[1024];
ArrayList<byte []> byteList = new ArrayList<>();
ArrayList<Integer> byteLength = new ArrayList<>();
int length;
int totalLength = 0;
while( (length = inputStream.read(buffer)) != -1 ) {
byteList.add(buffer);
byteLength.add(length);
totalLength += length;
buffer = new byte[1024];
}
http.disconnect();
byte [] all;
all = new byte[totalLength];
totalLength = 0;
while(byteList.size() != 0) {
System.arraycopy(byteList.get(0), 0, all, totalLength, byteLength.get(0));
totalLength += byteLength.get(0);
byteList.remove(0);
byteLength.remove(0);
}
String content = new String(all, StandardCharsets.UTF_8);
all = null;
content = content.split("tbody")[1];
String [] us = content.split("href=\"");
for(int i = 1; i < us.length; i ++) {
String href = us[i].split("\"", 2)[0];
if(href.equals("../")) {
continue;
}
if(href.charAt(href.length() - 1) == '/') {
GetPage(after + href);
} else {
GetFile(after + href);
}
}
} else {
GetPage(after);
}
}
static void GetFile(String url) throws Exception{
System.out.println(url);
HttpURLConnection http;
http = (HttpURLConnection) (new URL(urlF + url)).openConnection();
http.setRequestMethod("GET");
http.setRequestProperty("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3562.0 Safari/537.36");
http.connect();
if(http.getResponseCode() == 200) {
InputStream inputStream = http.getInputStream();
byte [] buffer = new byte[1024];
ArrayList<byte []> byteList = new ArrayList<>();
ArrayList<Integer> byteLength = new ArrayList<>();
int length;
int totalLength = 0;
while( (length = inputStream.read(buffer)) != -1 ) {
byteList.add(buffer);
byteLength.add(length);
totalLength += length;
buffer = new byte[1024];
}
http.disconnect();
byte [] all;
all = new byte[totalLength];
totalLength = 0;
while(byteList.size() != 0) {
System.arraycopy(byteList.get(0), 0, all, totalLength, byteLength.get(0));
totalLength += byteLength.get(0);
byteList.remove(0);
byteLength.remove(0);
}
File f = new File(fileP + url.replaceAll("/", "\\\\"));
f.createNewFile();
FileOutputStream fos = new FileOutputStream(f, false);
fos.write(all);
fos.flush();
fos.close();
} else {
GetFile(url);
}
}
}
不过,因为这个网站服务器是位于美国的,所以速度会比较慢。而且好像有防爬虫机制,如果频繁重复下载会有一定的限制,不过很快的就解除就是了。