Wrote program for Day 61
This commit is contained in:
parent
7ee015e03b
commit
6b024d00bd
@ -107,7 +107,7 @@ We encourage you to share your progress and ask questions in the Discussions sec
|
|||||||
| Day #58 | [Create A Dice Roller](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-58_Create-A-Dice-Roller) | :white_check_mark: |
|
| Day #58 | [Create A Dice Roller](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-58_Create-A-Dice-Roller) | :white_check_mark: |
|
||||||
| Day #59 | [Perfectly Balanced](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-59_Perfectly-Balanced) | :white_check_mark: |
|
| Day #59 | [Perfectly Balanced](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-59_Perfectly-Balanced) | :white_check_mark: |
|
||||||
| Day #60 | [A Game Of Threes](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-60_A-Game-Of-Thrones) | :white_check_mark: |
|
| Day #60 | [A Game Of Threes](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-60_A-Game-Of-Thrones) | :white_check_mark: |
|
||||||
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_large_square: |
|
| Day #61 | [Write A Web Crawler](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-61_Write-A-Web-Crawler) | :white_check_mark: |
|
||||||
| Day #62 | [Funny Plant](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-62_Funny-Plant) | :white_large_square: |
|
| Day #62 | [Funny Plant](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-62_Funny-Plant) | :white_large_square: |
|
||||||
| Day #63 | [The Rabbit Problem](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-63_The-Rabbit-Problem) | :white_large_square: |
|
| Day #63 | [The Rabbit Problem](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-09/Day-63_The-Rabbit-Problem) | :white_large_square: |
|
||||||
| Day #64 | [First Recurring Character](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-10/Day-64_First-Recurring-Character) | :white_large_square: |
|
| Day #64 | [First Recurring Character](https://github.com/LiveGray/100-Days-Of-Rust/tree/main/Week-10/Day-64_First-Recurring-Character) | :white_large_square: |
|
||||||
|
9
Week-09/Day-61_Write-A-Web-Crawler/day61/Cargo.toml
Normal file
9
Week-09/Day-61_Write-A-Web-Crawler/day61/Cargo.toml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
[package]
|
||||||
|
name = "day61"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
tokio = { version = "1.40.0", features = ["macros", "rt-multi-thread"] }
|
||||||
|
reqwest = "0.12.7"
|
||||||
|
scraper = "0.20.0"
|
43
Week-09/Day-61_Write-A-Web-Crawler/day61/src/lib.rs
Normal file
43
Week-09/Day-61_Write-A-Web-Crawler/day61/src/lib.rs
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
pub async fn crawl(url: &str, max_depth: u8) -> Vec<String> {
|
||||||
|
let client = reqwest::Client::new();
|
||||||
|
|
||||||
|
let mut discovered_urls = Vec::new();
|
||||||
|
let mut current_urls = vec![url.to_string()];
|
||||||
|
let mut next_urls = Vec::new();
|
||||||
|
|
||||||
|
for _ in 0..=max_depth {
|
||||||
|
for url in current_urls.iter() {
|
||||||
|
let response = match client.get(url).send().await {
|
||||||
|
Ok(response) => response,
|
||||||
|
Err(_) => {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
.text()
|
||||||
|
.await
|
||||||
|
.unwrap_or_default();
|
||||||
|
|
||||||
|
let document = scraper::Html::parse_document(&response);
|
||||||
|
for element in document.select(&scraper::Selector::parse("[href],[src]").unwrap()) {
|
||||||
|
let url = if element.value().attr("src").is_some() {
|
||||||
|
element.value().attr("src").unwrap()
|
||||||
|
} else {
|
||||||
|
element.value().attr("href").unwrap()
|
||||||
|
};
|
||||||
|
if url.starts_with("https")
|
||||||
|
&& !discovered_urls.contains(&url.to_string())
|
||||||
|
&& !current_urls.contains(&url.to_string())
|
||||||
|
&& !next_urls.contains(&url.to_string())
|
||||||
|
{
|
||||||
|
next_urls.push(url.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
discovered_urls.append(&mut current_urls);
|
||||||
|
current_urls = next_urls;
|
||||||
|
next_urls = Vec::new();
|
||||||
|
}
|
||||||
|
|
||||||
|
discovered_urls
|
||||||
|
}
|
20
Week-09/Day-61_Write-A-Web-Crawler/day61/src/main.rs
Normal file
20
Week-09/Day-61_Write-A-Web-Crawler/day61/src/main.rs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
use std::io::{self, Write};
|
||||||
|
|
||||||
|
#[tokio::main]
|
||||||
|
async fn main() {
|
||||||
|
let mut buffer = String::new();
|
||||||
|
|
||||||
|
print!("Enter the URL to crawl: ");
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
io::stdin().read_line(&mut buffer).unwrap();
|
||||||
|
let url = buffer.trim().to_string();
|
||||||
|
|
||||||
|
buffer.clear();
|
||||||
|
print!("Enter the max depth: ");
|
||||||
|
io::stdout().flush().unwrap();
|
||||||
|
io::stdin().read_line(&mut buffer).unwrap();
|
||||||
|
let max_depth = buffer.trim().parse().unwrap();
|
||||||
|
|
||||||
|
let discovered_urls = day61::crawl(&url, max_depth).await;
|
||||||
|
println!("{:?}", discovered_urls);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user