ploughshares/docker/crawler/write_to_api.py

65 lines
2.1 KiB
Python

import json
import requests
import sys
API_BASE_URL = "http://ploughshares.nixc.us/api/transaction"
HEADERS = {"Content-Type": "application/json"}
allowed_fields = {
"transaction_type", "company_division", "recipient", "amount",
"description", "address_1", "address_2", "city", "province", "region",
"postal_code", "source_date", "source_description", "grant_type",
"commodity_class", "contract_number", "comments", "is_primary"
}
def clean_for_api(tx):
cleaned = {k: v for k, v in tx.items() if k in allowed_fields}
# Remove invalid source_date
if "source_date" in cleaned:
if not isinstance(cleaned["source_date"], str) or cleaned["source_date"].lower() == "not found":
cleaned.pop("source_date")
# Remove invalid amount (API expects numeric)
if "amount" in cleaned:
# If "Not Found" or not parseable as a float, drop it
try:
float(str(cleaned["amount"]).replace(",", "").replace("$", ""))
except ValueError:
cleaned.pop("amount")
# Use source_url for source_description
if "source_url" in tx:
cleaned["source_description"] = tx["source_url"]
return cleaned
def post_transaction(transaction):
payload = clean_for_api(transaction)
response = requests.post(API_BASE_URL, headers=HEADERS, json=payload)
if response.status_code == 200 or response.status_code == 201:
print(f"✅ Created transaction for {payload['company_division']} → ID: {response.json().get('transaction_id')}")
else:
print(f"❌ Failed to create transaction: {response.status_code} - {response.text}")
def main(json_file_path):
with open(json_file_path, "r", encoding="utf-8") as f:
transactions = json.load(f)
if not isinstance(transactions, list):
transactions = [transactions]
for tx in transactions:
try:
post_transaction(tx)
except Exception as e:
print(f"Error posting transaction: {e}")
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python write_to_api.py results.json")
sys.exit(1)
main(sys.argv[1])