#!/bin/bash

# Function to get website categorization and reputation from trustedsource.org of a given URL
# Raw HTTP response is saved as <domain>.log for later analysis under the folder data/
make_request() {
    url=$1
    url_normalized=$(echo "$1" | sed 's/https:\/\///')
    logfile="data/$url_normalized.log"

curl --path-as-is -i -s -k -X $'POST' \
    -H $'Host: trustedsource.org' -H $'Cache-Control: max-age=0' -H $'Sec-Ch-Ua: \"Not-A.Brand\";v=\"99\", \"Chromium\";v=\"124\"' -H $'Sec-Ch-Ua-Mobile: ?0' -H $'Sec-Ch-Ua-Platform: \"Windows\"' -H $'Upgrade-Insecure-Requests: 1' -H $'Origin: https://trustedsource.org' -H $'Content-Type: multipart/form-data; boundary=----WebKitFormBoundary4cY4hZ8BHB6NUYQC' -H $'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.60 Safari/537.36' -H $'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7' -H $'Sec-Fetch-Site: same-origin' -H $'Sec-Fetch-Mode: navigate' -H $'Sec-Fetch-User: ?1' -H $'Sec-Fetch-Dest: document' -H $'Referer: https://trustedsource.org/' -H $'Accept-Encoding: gzip, deflate, br' -H $'Accept-Language: de-DE,de;q=0.9,en-US;q=0.8,en;q=0.7' -H $'Priority: u=0, i' \
    -b $'AWSALB=eJToCwD6Pm4bWC/cFjFaxZM2VAXUdWYOgusSUjSGemhmm0XVx8qhOBlYebMxSJeEqgoxSLBD/7XW8OiGPkfNH8U9r5aNC0ff+1sACd/9RNW+JZAY+CYMsEqK2apk; AWSALBCORS=eJToCwD6Pm4bWC/cFjFaxZM2VAXUdWYOgusSUjSGemhmm0XVx8qhOBlYebMxSJeEqgoxSLBD/7XW8OiGPkfNH8U9r5aNC0ff+1sACd/9RNW+JZAY+CYMsEqK2apk' \
    --data-binary $'------WebKitFormBoundary4cY4hZ8BHB6NUYQC\x0d\x0aContent-Disposition: form-data; name=\"action\"\x0d\x0a\x0d\x0achecksingle\x0d\x0a------WebKitFormBoundary4cY4hZ8BHB6NUYQC\x0d\x0aContent-Disposition: form-data; name=\"product\"\x0d\x0a\x0d\x0a01-ts\x0d\x0a------WebKitFormBoundary4cY4hZ8BHB6NUYQC\x0d\x0aContent-Disposition: form-data; name=\"url\"\x0d\x0a\x0d\x0a'"$url"$'\x0d\x0a------WebKitFormBoundary4cY4hZ8BHB6NUYQC--\x0d\x0a' \
    $'https://trustedsource.org/en/feedback/url' > "$logfile"

echo "Processing $url"

}

# Check if the filename is provided as an argument
if [ "$#" -ne 1 ]; then
    echo "Provide website categorization and reputation from a list of URLs using trustedsource.org"
    echo "Usage: $0 <file with list of urls>"
    exit 1
fi

# Input file containing URLs
input_file="$1"

echo "Starting website categorization and reputation from file $input_file by using trustedsource.org:"
# Read file line by line
while IFS= read -r line; do
    # Check if line contains a URL
    if [[ $line =~ ^https?:// ]]; then
        # Make request and filter response
        make_request "$line"
    else
        echo "Skipping non-HTTPS line: $line"
    fi
done < "$input_file"
echo "Finished. Results are saved in subfolder data/." 

