Wrote program for Day 61

This commit is contained in:
Mariano Riefolo 2024-09-24 10:09:05 +02:00
parent 7ee015e03b
commit 6b024d00bd
4 changed files with 73 additions and 1 deletions

View File

@ -107,7 +107,7 @@ We encourage you to share your progress and ask questions in the Discussions sec
| Day #58 | [Create A Dice Roller](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-58_Create-A-Dice-Roller) | :white_check_mark: |
| Day #59 | [Perfectly Balanced](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-59_Perfectly-Balanced) | :white_check_mark: |
| Day #60 | [A Game Of Threes](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-60_A-Game-Of-Thrones) | :white_check_mark: |
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_large_square: |
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_check_mark: |
| Day #62 | [Funny Plant](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-62_Funny-Plant) | :white_large_square: |
| Day #63 | [The Rabbit Problem](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-63_The-Rabbit-Problem) | :white_large_square: |
| Day #64 | [First Recurring Character](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-10/Day-64_First-Recurring-Character) | :white_large_square: |

View File

@ -0,0 +1,9 @@
[package]
name = "day61"
version = "0.1.0"
edition = "2021"
[dependencies]
tokio = { version = "1.40.0", features = ["macros", "rt-multi-thread"] }
reqwest = "0.12.7"
scraper = "0.20.0"

View File

@ -0,0 +1,43 @@
pub async fn crawl(url: &str, max_depth: u8) -> Vec<String> {
let client = reqwest::Client::new();
let mut discovered_urls = Vec::new();
let mut current_urls = vec![url.to_string()];
let mut next_urls = Vec::new();
for _ in 0..=max_depth {
for url in current_urls.iter() {
let response = match client.get(url).send().await {
Ok(response) => response,
Err(_) => {
continue;
}
}
.text()
.await
.unwrap_or_default();
let document = scraper::Html::parse_document(&response);
for element in document.select(&scraper::Selector::parse("[href],[src]").unwrap()) {
let url = if element.value().attr("src").is_some() {
element.value().attr("src").unwrap()
} else {
element.value().attr("href").unwrap()
};
if url.starts_with("https")
&& !discovered_urls.contains(&url.to_string())
&& !current_urls.contains(&url.to_string())
&& !next_urls.contains(&url.to_string())
{
next_urls.push(url.to_string());
}
}
}
discovered_urls.append(&mut current_urls);
current_urls = next_urls;
next_urls = Vec::new();
}
discovered_urls
}

View File

@ -0,0 +1,20 @@
use std::io::{self, Write};
#[tokio::main]
async fn main() {
let mut buffer = String::new();
print!("Enter the URL to crawl: ");
io::stdout().flush().unwrap();
io::stdin().read_line(&mut buffer).unwrap();
let url = buffer.trim().to_string();
buffer.clear();
print!("Enter the max depth: ");
io::stdout().flush().unwrap();
io::stdin().read_line(&mut buffer).unwrap();
let max_depth = buffer.trim().parse().unwrap();
let discovered_urls = day61::crawl(&url, max_depth).await;
println!("{:?}", discovered_urls);
}