分享一款懶人必備的Python爬蟲神器
pip install pytest-playwright
playwright install
python -m playwright codegen xxx.com
import time
from playwright.sync_api import Playwright, sync_playwright, expect
import?numpy?as?np
data_test=np.loadtxt('./dist/1_res.csv',delimiter=',')
def get_str(arr):
arr_str=""
for i in arr:
arr_str+=str(format(i,".2f"))+"\r\n"
????return?arr_str
labs=[]
def run(playwright: Playwright) -> None:
browser = playwright.chromium.launch(headless=False)
????context?=?browser.new_context()
# Open new page
????page?=?context.new_page()
# Go to https://www.qtccolor.com/secaiku/tool/spectrum
????page.goto("https://www.qtccolor.com/secaiku/tool/spectrum")
# Click div[role="tab"]:has-text("光譜數(shù)據(jù)")
????page.locator("div[role=\"tab\"]:has-text(\"光譜數(shù)據(jù)\")").click()
# Click text=最小波長:nm
??? page.locator("text=最小波長:nm").click()
# Fill [placeholder="\33 80"]
????page.locator("[placeholder=\"\\33?80\"]").fill("400")
# Select 1964
????page.locator("select[name=\"obs\"]").select_option("1964")
# Select D65
page.locator("select[name=\"ill\"]").select_option("D65")
# Fill textarea[name="spectrum"]
for i in range(len(data_test)):
inputs=get_str(data_test[i,:])
# Click textarea[name="spectrum"]
page.locator("textarea[name=\"spectrum\"]").click()
page.locator("textarea[name=\"spectrum\"]").press("Control+a")
????????page.locator("textarea[name=\"spectrum\"]").fill(inputs)
# Click button:has-text("轉(zhuǎn)換顏色")
page.locator("button:has-text(\"轉(zhuǎn)換顏色\")").click()
time.sleep(1)
# Click text=Lab0.000.000.00 >> td >> nth=1
L=float(page.locator('xpath=//*[@id="scroll_container"]/div[1]/div/div[2]/table/tbody/tr[2]/td[2]').inner_text())
# Click text=Lab0.000.000.00 >> td >> nth=2
a=float(page.locator('xpath=//*[@id="scroll_container"]/div[1]/div/div[2]/table/tbody/tr[2]/td[3]').inner_text())
# Click text=Lab0.000.000.00 >> td >> nth=3
b=float(page.locator('xpath=//*[@id="scroll_container"]/div[1]/div/div[2]/table/tbody/tr[2]/td[4]').inner_text())
print(L,a,b)
labs.append([L,a,b])
# ---------------------
context.close()
????browser.close()
with sync_playwright() as playwright:
run(playwright)
np.savetxt('./1_lab_res.csv',labs,delimiter=",")
大廠出品果然不同,使用它在不考慮運行效率(有異步但是我懶得看了)的情況下可以輕松實現(xiàn)復(fù)雜操作,懶人最愛!
來源:https://juejin.cn/post/7140542063061237773(侵刪)