#!/usr/bin/env nix-shell #! nix-shell -i bash -p gnugrep gnused coreutils # PayPal SOAP Log Parser - Structured Output # Usage: ./parse_paypal.sh [OPTIONS] show_help() { cat << EOF PayPal SOAP Log Parser USAGE: $0 [OPTIONS] OPTIONS: -h, --help Show this help message -s, --summary Show summary statistics only -r, --raw Output raw structured data (default) OUTPUT FORMAT (tab-separated): TRANS_NUM|AMOUNT|CURRENCY|FIRSTNAME|LASTNAME|STREET|CITY|STATE|ZIP|CCTYPE|CCLAST4|EXPMONTH|EXPYEAR|CVV|TRANSID|STATUS|CORRID|PROC_AMOUNT FIELD DESCRIPTIONS: TRANS_NUM - Transaction sequence number AMOUNT - Order total amount CURRENCY - Currency code (USD, etc) FIRSTNAME - Customer first name LASTNAME - Customer last name STREET - Street address CITY - City name STATE - State/Province code ZIP - Postal code CCTYPE - Credit card type (Visa, MasterCard, etc) CCLAST4 - Last 4 digits of credit card EXPMONTH - Card expiration month EXPYEAR - Card expiration year CVV - CVV code TRANSID - PayPal transaction ID STATUS - Transaction status (Success/Failure) CORRID - Correlation ID PROC_AMOUNT - Actually processed amount EXAMPLES: # Get all transactions $0 payments.log # Get only successful transactions $0 payments.log | grep Success # Count transactions by state $0 payments.log | cut -d'|' -f8 | sort | uniq -c | sort -nr # Find largest transaction $0 payments.log | sort -t'|' -k2 -nr | head -1 # Get transactions over \$500 $0 payments.log | awk -F'|' '\$2 > 500' # Summary stats $0 -s payments.log EOF } # Parse arguments SUMMARY_ONLY=false while [[ $# -gt 0 ]]; do case $1 in -h|--help) show_help exit 0 ;; -s|--summary) SUMMARY_ONLY=true shift ;; -r|--raw) SUMMARY_ONLY=false shift ;; -*) echo "Unknown option $1" show_help exit 1 ;; *) LOGFILE="$1" break ;; esac done if [[ -z "$LOGFILE" ]]; then echo "Error: No logfile specified" show_help exit 1 fi if [[ ! -f "$LOGFILE" ]]; then echo "Error: File '$LOGFILE' not found" exit 1 fi # Extract requests and responses REQUESTS=$(sed -nr 's/PPAPIService: Request: (.*)/\1/p' "$LOGFILE") RESPONSES=$(sed -nr 's/PPAPIService: Response: <\?.*\?>(.*)/\1/p' "$LOGFILE") if [[ "$SUMMARY_ONLY" == "true" ]]; then echo "=== SUMMARY ===" TOTAL=$(grep -c "PPAPIService: Request:" "$LOGFILE") SUCCESSFUL=$(grep -c "Success" "$LOGFILE") echo "Total Transactions: $TOTAL" echo "Successful: $SUCCESSFUL" echo "Failed: $((TOTAL - SUCCESSFUL))" echo echo "Top 5 States by Transaction Count:" grep -oP '\K[^<]+' "$LOGFILE" | sort | uniq -c | sort -nr | head -5 echo echo "Transaction Amount Stats:" AMOUNTS=$(grep -oP ']*>\K[0-9.]+' "$LOGFILE") TOTAL_AMOUNT=$(echo "$AMOUNTS" | awk '{sum += $1} END {printf "%.2f", sum}') LARGEST=$(echo "$AMOUNTS" | sort -nr | head -1) SMALLEST=$(echo "$AMOUNTS" | sort -n | head -1) echo " Total: \$${TOTAL_AMOUNT}" echo " Largest: \$${LARGEST}" echo " Smallest: \$${SMALLEST}" exit 0 fi # Convert responses to array for lookup declare -a response_array RESP_NUM=1 while IFS= read -r response_xml; do if [[ -n "$response_xml" ]]; then TRANSID=$(echo "$response_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g' | tr -d '\n\r') STATUS=$(echo "$response_xml" | grep -o ']*>[^<]*' | sed 's/<[^>]*>//g' | tr -d '\n\r') CORRID=$(echo "$response_xml" | grep -o ']*>[^<]*' | sed 's/<[^>]*>//g' | tr -d '\n\r') PROC_AMOUNT=$(echo "$response_xml" | grep -o ']*>[^<]*' | sed 's/<[^>]*>//g' | tr -d '\n\r') response_array[$RESP_NUM]="$TRANSID|$STATUS|$CORRID|$PROC_AMOUNT" RESP_NUM=$((RESP_NUM + 1)) fi done <<< "$RESPONSES" # Process each request and output structured data TRANS_NUM=1 while IFS= read -r request_xml; do if [[ -n "$request_xml" ]]; then # Extract all fields AMOUNT=$(echo "$request_xml" | grep -o ']*>[^<]*' | sed 's/.*>\([0-9.]*\)<.*/\1/') CURRENCY=$(echo "$request_xml" | grep -o 'currencyID="[^"]*"' | sed 's/currencyID="\([^"]*\)"/\1/') FIRSTNAME=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') LASTNAME=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') STREET=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') CITY=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') STATE=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') ZIP=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') CCTYPE=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') CCLAST4=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') EXPMONTH=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') EXPYEAR=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') CVV=$(echo "$request_xml" | grep -o '[^<]*' | sed 's/<[^>]*>//g') # Get corresponding response data RESPONSE_DATA="${response_array[$TRANS_NUM]}" if [[ -n "$RESPONSE_DATA" ]]; then IFS='|' read -r TRANSID STATUS CORRID PROC_AMOUNT <<< "$RESPONSE_DATA" else TRANSID="" STATUS="" CORRID="" PROC_AMOUNT="" fi # Output structured line (pipe-separated for easy parsing) echo "$TRANS_NUM|${AMOUNT}|${CURRENCY}|${FIRSTNAME}|${LASTNAME}|${STREET}|${CITY}|${STATE}|${ZIP}|${CCTYPE}|${CCLAST4}|${EXPMONTH}|${EXPYEAR}|${CVV}|${TRANSID}|${STATUS}|${CORRID}|${PROC_AMOUNT}" TRANS_NUM=$((TRANS_NUM + 1)) fi done <<< "$REQUESTS"