ProTransparenz
/
lobbywatch


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
							import polars as pl
import os


def transform_data(df: pl.DataFrame) -> pl.DataFrame:
    """
    Transform data according to the specified OpenRefine operations

    Args:
        df: Input DataFrame with the original structure

    Returns:
        Transformed DataFrame
    """

    df_cleaned = df.cast({"Betrag": pl.Float32})

    # Filter out rows where 'Betrag' is 0 or empty
    df_cleaned = df_cleaned.filter(pl.col("Betrag") != 0.0).filter(
        pl.col("Betrag") is not None
    )

    return df_cleaned


def process_file(input_file: str, inplace: bool = True) -> None:
    """
    Process a single CSV file and save the transformed version

    Args:
        input_file: Path to the input CSV file
    """
    # Generate output filename
    if inplace:
        output_file = input_file
    else:
        filename, ext = os.path.splitext(input_file)
        output_file = f"{filename}_clean{ext}"

    print(f"Processing {input_file}...")

    try:
        # Read the CSV file
        df = pl.read_csv(input_file)

        # Transform the data
        cleaned_df = transform_data(df)

        # Save the transformed data
        cleaned_df.write_csv(output_file)
        print(f"Successfully saved cleaned data to {output_file}")

    except Exception as e:
        print(f"Error processing {input_file}: {str(e)}")


def main():
    # List of files to process
    files_to_process = [
        "nat.csv",
        "juristische.csv",
        "afd_nat.csv",
        "afd_juristische.csv",
    ]

    # Process each file
    for file in files_to_process:
        process_file(file)


if __name__ == "__main__":
    main()