using HtmlAgilityPack;
using OpenQA.Selenium;
using OpenQA.Selenium.Chrome;
using OpenQA.Selenium.PhantomJS;
namespace Christopher.Snay.Sample.Services.Scrapers
{
internal class ChromeScraper : IChromeScraper
{
public HtmlDocument ScrapeHtml(Uri url)
{
return ScrapeHtml(url.ToString());
}
public HtmlDocument ScrapeHtml(string url)
{
HtmlDocument doc = new();
using (PhantomJSDriverService driverService = PhantomJSDriverService.CreateDefaultService())
{
driverService.HideCommandPromptWindow = true;
driverService.LoadImages = false;
driverService.IgnoreSslErrors = true;
driverService.Start();
using IWebDriver driver = new ChromeDriver();
driver.Navigate().GoToUrl(url);
Thread.Sleep(3000);
doc.LoadHtml(driver.PageSource);
}
return doc;
}
}
public interface IChromeScraper
{
HtmlDocument ScrapeHtml(string url);
HtmlDocument ScrapeHtml(Uri url);
}
}
Requirements – The following executables must be in /bin
- phantonjs.exe
- chrome.exe
- chromedriver.exe
- + the chrome portable binaries directory, currently named \107.0.5304.88
<!-- To copy directly to bin without being placed in a sub-folder -->
<ItemGroup>
<ContentWithTargetPath Include="Assets\phantomjs.exe">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<TargetPath>phantomjs.exe</TargetPath>
</ContentWithTargetPath>
<ContentWithTargetPath Include="Assets\chrome.exe">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<TargetPath>chrome.exe</TargetPath>
</ContentWithTargetPath>
<ContentWithTargetPath Include="Assets\chromedriver.exe">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
<TargetPath>chromedriver.exe</TargetPath>
</ContentWithTargetPath>
<None Include="Assets\phantomjs.exe" />
<None Include="Assets\chrome.exe" />
<None Include="Assets\chromedriver.exe" />
</ItemGroup>
I’ve used Chrome portable to avoid having to install Chrome. If Chrome is installed, the chrome.exe steps can probably be skipped.