Wrote program for Day 61
This commit is contained in:
parent
7ee015e03b
commit
6b024d00bd
@ -107,7 +107,7 @@ We encourage you to share your progress and ask questions in the Discussions sec
|
||||
| Day #58 | [Create A Dice Roller](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-58_Create-A-Dice-Roller) | :white_check_mark: |
|
||||
| Day #59 | [Perfectly Balanced](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-59_Perfectly-Balanced) | :white_check_mark: |
|
||||
| Day #60 | [A Game Of Threes](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-60_A-Game-Of-Thrones) | :white_check_mark: |
|
||||
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_large_square: |
|
||||
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_check_mark: |
|
||||
| Day #62 | [Funny Plant](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-62_Funny-Plant) | :white_large_square: |
|
||||
| Day #63 | [The Rabbit Problem](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-63_The-Rabbit-Problem) | :white_large_square: |
|
||||
| Day #64 | [First Recurring Character](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-10/Day-64_First-Recurring-Character) | :white_large_square: |
|
||||
|
9
Week-09/Day-61_Write-A-Web-Crawler/day61/Cargo.toml
Normal file
9
Week-09/Day-61_Write-A-Web-Crawler/day61/Cargo.toml
Normal file
@ -0,0 +1,9 @@
|
||||
[package]
|
||||
name = "day61"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
tokio = { version = "1.40.0", features = ["macros", "rt-multi-thread"] }
|
||||
reqwest = "0.12.7"
|
||||
scraper = "0.20.0"
|
43
Week-09/Day-61_Write-A-Web-Crawler/day61/src/lib.rs
Normal file
43
Week-09/Day-61_Write-A-Web-Crawler/day61/src/lib.rs
Normal file
@ -0,0 +1,43 @@
|
||||
pub async fn crawl(url: &str, max_depth: u8) -> Vec<String> {
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
let mut discovered_urls = Vec::new();
|
||||
let mut current_urls = vec![url.to_string()];
|
||||
let mut next_urls = Vec::new();
|
||||
|
||||
for _ in 0..=max_depth {
|
||||
for url in current_urls.iter() {
|
||||
let response = match client.get(url).send().await {
|
||||
Ok(response) => response,
|
||||
Err(_) => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
.text()
|
||||
.await
|
||||
.unwrap_or_default();
|
||||
|
||||
let document = scraper::Html::parse_document(&response);
|
||||
for element in document.select(&scraper::Selector::parse("[href],[src]").unwrap()) {
|
||||
let url = if element.value().attr("src").is_some() {
|
||||
element.value().attr("src").unwrap()
|
||||
} else {
|
||||
element.value().attr("href").unwrap()
|
||||
};
|
||||
if url.starts_with("https")
|
||||
&& !discovered_urls.contains(&url.to_string())
|
||||
&& !current_urls.contains(&url.to_string())
|
||||
&& !next_urls.contains(&url.to_string())
|
||||
{
|
||||
next_urls.push(url.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
discovered_urls.append(&mut current_urls);
|
||||
current_urls = next_urls;
|
||||
next_urls = Vec::new();
|
||||
}
|
||||
|
||||
discovered_urls
|
||||
}
|
20
Week-09/Day-61_Write-A-Web-Crawler/day61/src/main.rs
Normal file
20
Week-09/Day-61_Write-A-Web-Crawler/day61/src/main.rs
Normal file
@ -0,0 +1,20 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() {
|
||||
let mut buffer = String::new();
|
||||
|
||||
print!("Enter the URL to crawl: ");
|
||||
io::stdout().flush().unwrap();
|
||||
io::stdin().read_line(&mut buffer).unwrap();
|
||||
let url = buffer.trim().to_string();
|
||||
|
||||
buffer.clear();
|
||||
print!("Enter the max depth: ");
|
||||
io::stdout().flush().unwrap();
|
||||
io::stdin().read_line(&mut buffer).unwrap();
|
||||
let max_depth = buffer.trim().parse().unwrap();
|
||||
|
||||
let discovered_urls = day61::crawl(&url, max_depth).await;
|
||||
println!("{:?}", discovered_urls);
|
||||
}
|
Loading…
Reference in New Issue
Block a user