[ 'code' => 'tha', 'path' => 'tha/l2/fgm/', 'file_prefix' => 'tha_l2_fgm_', 'variable' => 'tha_fgs_gsm' ], 'B' => [ 'code' => 'thb', 'path' => 'thb/l2/fgm/', 'file_prefix' => 'thb_l2_fgm_', 'variable' => 'thb_fgs_gsm' ], 'C' => [ 'code' => 'thc', 'path' => 'thc/l2/fgm/', 'file_prefix' => 'thc_l2_fgm_', 'variable' => 'thc_fgs_gsm' ], 'D' => [ 'code' => 'thd', 'path' => 'thd/l2/fgm/', 'file_prefix' => 'thd_l2_fgm_', 'variable' => 'thd_fgs_gsm' ], 'E' => [ 'code' => 'the', 'path' => 'the/l2/fgm/', 'file_prefix' => 'the_l2_fgm_', 'variable' => 'the_fgs_gsm' ] ]; public function __construct($satellite) { $satellite = strtoupper($satellite); if (!isset($this->satellites[$satellite])) { throw new InvalidArgumentException("Invalid satellite. Must be one of: " . implode(', ', array_keys($this->satellites))); } $this->satellite = $satellite; $this->satelliteConfig = $this->satellites[$satellite]; } /** * Get available CDF files for a date range * @param string $startDate Format: YYYY-MM-DD * @param string $endDate Format: YYYY-MM-DD * @return array Array of CDF file URLs */ public function getCdfUrls($startDate, $endDate) { $urls = []; $start = new DateTime($startDate); $end = new DateTime($endDate); while ($start <= $end) { $year = $start->format('Y'); $month = $start->format('m'); $day = $start->format('d'); // Get the directory listing to find the actual version number $dirUrl = $this->baseUrl . $this->satelliteConfig['path'] . $year . '/'; $filePattern = $this->satelliteConfig['file_prefix'] . "{$year}{$month}{$day}_v"; $availableFiles = $this->getDirectoryListing($dirUrl); // Find the latest version for this date $latestFile = null; $latestVersion = -1; foreach ($availableFiles as $file) { if (strpos($file, $filePattern) === 0 && substr($file, -4) === '.cdf') { // Extract version number $versionMatch = []; if (preg_match('/v(\d+)\.cdf$/', $file, $versionMatch)) { $version = intval($versionMatch[1]); if ($version > $latestVersion) { $latestVersion = $version; $latestFile = $file; } } } } if ($latestFile) { $urls[] = $dirUrl . $latestFile; } $start->add(new DateInterval('P1D')); } return $urls; } /** * Get directory listing from CDAWeb * @param string $url Directory URL * @return array Array of filenames */ private function getDirectoryListing($url) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, 30); curl_setopt($ch, CURLOPT_USERAGENT, 'PHP THEMIS Data Retriever'); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200 || $response === false) { error_log("Failed to get directory listing from: $url"); return []; } // Parse HTML directory listing to extract filenames preg_match_all('/createPythonScript(); $scriptPath = tempnam(sys_get_temp_dir(), 'cdf_reader') . '.py'; file_put_contents($scriptPath, $pythonScript); $jsonOutput = tempnam(sys_get_temp_dir(), 'cdf_data') . '.json'; // Execute Python script with the GSM variable name $gsmVariable = $this->satelliteConfig['variable']; $command = "python \"$scriptPath\" \"$cdfPath\" \"$jsonOutput\" \"$gsmVariable\" 2>&1"; $output = shell_exec($command); // Clean up temporary script unlink($scriptPath); if (!file_exists($jsonOutput)) { error_log("Failed to extract data from CDF: $cdfPath"); error_log("Python output: $output"); return false; } $data = json_decode(file_get_contents($jsonOutput), true); unlink($jsonOutput); return $data; } /** * Create Python script for reading CDF files * @return string Python script content */ private function createPythonScript() { return ' import sys import json import numpy as np from datetime import datetime from spacepy import pycdf def read_cdf_data(cdf_path, output_path, gsm_variable): try: cdf = pycdf.CDF(cdf_path) # Print available variables for debugging vars_list = list(cdf.keys()) print(f"Available variables in CDF: {vars_list}") # Extract time data - try multiple possible time variable names time_data = None possible_time_vars = [ gsm_variable.replace("_fgs_gsm", "_fgs_time"), # e.g., tha_fgs_time gsm_variable.replace("_fgs_gsm", "_fgs_epoch"), # e.g., tha_fgs_epoch "Epoch", "time_unix", "time" ] for time_var in possible_time_vars: if time_var in vars_list: time_data = cdf[time_var][...] print(f"Using time variable: {time_var}") break # Extract GSM magnetic field data if gsm_variable not in vars_list: print(f"Error: Variable {gsm_variable} not found in CDF") print(f"Available variables: {vars_list}") cdf.close() return False gsm_data = cdf[gsm_variable][...] print(f"GSM variable: {gsm_variable}") print(f"GSM data shape: {gsm_data.shape}") print(f"Time data type: {type(time_data)}") if time_data is not None: print(f"Time data length: {len(time_data)}") # Convert to JSON-serializable format result = { "metadata": { "variable_name": gsm_variable, "data_points": len(gsm_data), "time_variable": next((tv for tv in possible_time_vars if tv in vars_list), "unknown"), "data_shape": list(gsm_data.shape) }, "time": [], "Bx_gsm": [], "By_gsm": [], "Bz_gsm": [] } # Process the data for i in range(len(gsm_data)): # Handle time data if time_data is not None and i < len(time_data): t = time_data[i] if hasattr(t, "isoformat"): result["time"].append(t.isoformat()) elif hasattr(t, "datetime"): result["time"].append(t.datetime.isoformat()) else: # Handle Unix timestamp or other numeric formats try: # Assume Unix timestamp dt = datetime.utcfromtimestamp(float(t)) result["time"].append(dt.isoformat()) except: result["time"].append(str(t)) else: result["time"].append(f"point_{i}") # Extract GSM components (assuming 3-component vector) if len(gsm_data.shape) > 1 and gsm_data.shape[1] >= 3: # Handle NaN values bx = float(gsm_data[i, 0]) if not np.isnan(gsm_data[i, 0]) else None by = float(gsm_data[i, 1]) if not np.isnan(gsm_data[i, 1]) else None bz = float(gsm_data[i, 2]) if not np.isnan(gsm_data[i, 2]) else None result["Bx_gsm"].append(bx) result["By_gsm"].append(by) result["Bz_gsm"].append(bz) else: print(f"Warning: Unexpected data structure for GSM data") result["Bx_gsm"].append(None) result["By_gsm"].append(None) result["Bz_gsm"].append(None) cdf.close() # Save to JSON with open(output_path, "w") as f: json.dump(result, f, indent=2) print(f"Successfully extracted {len(result[\"Bx_gsm\"])} data points") return True except Exception as e: print(f"Error reading CDF: {e}") import traceback traceback.print_exc() return False if __name__ == "__main__": if len(sys.argv) != 4: print("Usage: python script.py ") sys.exit(1) success = read_cdf_data(sys.argv[1], sys.argv[2], sys.argv[3]) sys.exit(0 if success else 1) '; } /** * Process date range and retrieve all data * @param string $startDate Format: YYYY-MM-DD * @param string $endDate Format: YYYY-MM-DD * @param string $outputFile Output JSON filename * @return bool Success status */ public function retrieveDataRange($startDate, $endDate, $outputFile) { $cdfUrls = $this->getCdfUrls($startDate, $endDate); if (empty($cdfUrls)) { echo "No CDF files found for the specified date range.\n"; return false; } echo "Found " . count($cdfUrls) . " CDF files to process.\n"; $allData = [ "metadata" => [ "mission" => "THEMIS-" . $this->satellite, "satellite_code" => $this->satelliteConfig['code'], "data_type" => "FGM GSM Magnetic Field", "start_date" => $startDate, "end_date" => $endDate, "retrieval_time" => date('c'), "files_processed" => count($cdfUrls), "variable_name" => $this->satelliteConfig['variable'] ], "files" => [] ]; $tempDir = sys_get_temp_dir() . '/themis_cdf/'; if (!is_dir($tempDir)) { mkdir($tempDir, 0755, true); } foreach ($cdfUrls as $url) { $filename = basename($url); $localPath = $tempDir . $filename; echo "Processing: $filename\n"; // Download CDF file if (!$this->downloadCdfFile($url, $localPath)) { echo " Failed to download, skipping...\n"; continue; } // Extract GSM data $gsmData = $this->extractGsmData($localPath); if ($gsmData !== false) { $allData["files"][] = [ "filename" => $filename, "url" => $url, "data" => $gsmData ]; echo " Successfully processed " . count($gsmData["Bx_gsm"]) . " data points\n"; } else { echo " Failed to extract data, skipping...\n"; } // Clean up downloaded file unlink($localPath); } // Clean up temp directory if (is_dir($tempDir)) { rmdir($tempDir); } // Save to JSON return $this->saveToJson($allData, $outputFile); } /** * Save data to JSON file */ private function saveToJson($data, $filename) { $jsonData = json_encode($data, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); if ($jsonData === false) { error_log("Failed to encode data to JSON: " . json_last_error_msg()); return false; } $result = file_put_contents($filename, $jsonData); if ($result === false) { error_log("Failed to write to file: $filename"); return false; } echo "Data successfully saved to $filename\n"; echo "File size: " . formatBytes(filesize($filename)) . "\n"; return true; } /** * Get satellite information */ public function getSatelliteInfo() { return [ 'satellite' => $this->satellite, 'code' => $this->satelliteConfig['code'], 'variable' => $this->satelliteConfig['variable'], 'path' => $this->satelliteConfig['path'] ]; } /** * List all available satellites */ public static function getAvailableSatellites() { return array_keys((new self('A'))->satellites); } } /** * Format bytes for human-readable output */ function formatBytes($bytes, $precision = 2) { $units = array('B', 'KB', 'MB', 'GB', 'TB'); for ($i = 0; $bytes > 1024; $i++) { $bytes /= 1024; } return round($bytes, $precision) . ' ' . $units[$i]; } // Usage function main() { global $argc, $argv; if ($argc < 4) { echo "Usage: php themis_retriever.php [output_file]\n"; echo "Satellite: " . implode(', ', ThemisDataRetriever::getAvailableSatellites()) . "\n"; echo "Date format: YYYY-MM-DD\n"; echo "Examples:\n"; echo " php themis_retriever.php A 2023-01-01 2023-01-03\n"; echo " php themis_retriever.php E 2023-01-01 2023-01-01 my_output.json\n"; exit(1); } $satellite = strtoupper($argv[1]); $startDate = $argv[2]; $endDate = $argv[3]; $outputFile = $argv[4] ?? "themis_{$satellite}_gsm_data_" . date('Y-m-d_H-i-s') . '.json'; // Validate satellite if (!in_array($satellite, ThemisDataRetriever::getAvailableSatellites())) { echo "Error: Invalid satellite '$satellite'. Must be one of: " . implode(', ', ThemisDataRetriever::getAvailableSatellites()) . "\n"; exit(1); } // Validate date format if (!DateTime::createFromFormat('Y-m-d', $startDate) || !DateTime::createFromFormat('Y-m-d', $endDate)) { echo "Error: Invalid date format. Use YYYY-MM-DD\n"; exit(1); } try { $retriever = new ThemisDataRetriever($satellite); $satInfo = $retriever->getSatelliteInfo(); echo "=== THEMIS GSM Magnetic Field Data Retriever ===\n"; echo "Satellite: THEMIS-{$satInfo['satellite']} ({$satInfo['code']})\n"; echo "Date range: $startDate to $endDate\n"; echo "Output file: $outputFile\n"; echo "Target variable: {$satInfo['variable']}\n\n"; if ($retriever->retrieveDataRange($startDate, $endDate, $outputFile)) { echo "\n=== Data retrieval completed successfully! ===\n"; } else { echo "\n=== Data retrieval failed ===\n"; exit(1); } } catch (Exception $e) { echo "Error: " . $e->getMessage() . "\n"; exit(1); } } // Run the script if (php_sapi_name() === 'cli') { main(); } ?>