#!/usr/bin/python3
# Create a file including all gTLDs 

import csv

def read_csv_to_dict(file_path):
    with open(file_path, mode='r', newline='', encoding='utf-8') as file:
        reader = csv.reader(file)
        data = {row[0]: row for row in reader}  # Use the first field as the key in a dictionary
    return data

def write_dict_to_csv(data, file_path):
    with open(file_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        for row in data.values():
            writer.writerow(row)

def remove_lines_by_first_field(file1_path, file2_path, output_file_path):
    # Read both CSV files into dictionaries, keyed by the first field
    data1 = read_csv_to_dict(file1_path)
    data2 = read_csv_to_dict(file2_path)
    
    # Remove lines from data1 that have a matching first field in data2
    result_data = {key: value for key, value in data1.items() if key not in data2}
    
    # Write the result to a new CSV file
    write_dict_to_csv(result_data, output_file_path)

def main():
    # Specify your input files and output file
    file1_path = 'tranco_V99PN.csv'
    file2_path = 'tranco_V99PN_ccTLDs.csv'
    output_file_path = 'tranco_V99PN_gTLDs.csv'

    # Call the function to process the files
    remove_lines_by_first_field(file1_path, file2_path, output_file_path)

    print(f"Processed '{file1_path}' and removed lines present in '{file2_path}'.")
    print(f"Result saved to '{output_file_path}'.")

if __name__ == "__main__":
    main()

