import pytest from pathlib import Path import sys # Add src to path to import sp2xr modules sys.path.insert(0, str(Path(__file__).parent.parent / "src")) from sp2xr.helpers import extract_sp2xr_filename_parts class TestExtractSP2XRFilenameParts: """Test cases for extract_sp2xr_filename_parts function.""" def test_standard_windows_path(self): """Test with typical Windows path format.""" file_path = r"C:\data\SP2XR\20240101\file_20240101_001.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_001" assert folder_name == "20240101" def test_standard_unix_path(self): """Test with typical Unix path format.""" file_path = "/data/SP2XR/20240101/file_20240101_001.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_001" assert folder_name == "20240101" def test_zip_file_extension(self): """Test with .zip extension.""" file_path = "/data/SP2XR/20240101/pbp_20240101_002.zip" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_002" assert folder_name == "20240101" def test_multiple_underscores(self): """Test filename with multiple underscores.""" file_path = "/data/SP2XR/20240101/sp2_data_file_20240101_003.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_003" assert folder_name == "20240101" def test_pathlib_object_input(self): """Test with Path object as input.""" file_path = Path("/data/SP2XR/20240101/hk_20240101_004.csv") file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_004" assert folder_name == "20240101" def test_mixed_path_separators(self): """Test handling of mixed path separators.""" file_path = r"C:\data\SP2XR/20240101\file_20240101_005.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_005" assert folder_name == "20240101" def test_single_underscore_filename(self): """Test filename with only one underscore.""" file_path = "/data/SP2XR/20240101/data_001.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) # Should take the last part after underscore assert file_name_cut == "data_001" assert folder_name == "data" def test_no_underscore_filename(self): """Test filename with no underscores - edge case.""" file_path = "/data/SP2XR/20240101/datafile.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) # Should fallback to stem and parent name assert file_name_cut == "datafile" assert folder_name == "20240101" def test_empty_filename_parts(self): """Test edge case with unusual filename structure.""" file_path = "/data/SP2XR/folder/_.csv" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) # For "_.csv": This is a pathological case that the original code couldn't handle well # The function returns the most reasonable interpretation: # file_name_cut = "_" (the stem of the file) # folder_name = "folder" (falls back to parent directory) assert file_name_cut == "_" assert folder_name == "folder" # Falls back to parent directory name @pytest.mark.parametrize("file_extension", [".csv", ".zip", ".parquet", ".txt"]) def test_different_extensions(self, file_extension): """Test with different file extensions.""" file_path = f"/data/SP2XR/20240101/test_20240101_001{file_extension}" file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == "20240101_001" assert folder_name == "20240101" def test_real_sp2xr_filename_patterns(self): """Test with realistic SP2XR filename patterns.""" test_cases = [ ("/data/SP2XR/20240101/PbP_20240101_001.csv", "20240101_001", "20240101"), ("/data/SP2XR/20240101/hk_20240101_002.zip", "20240101_002", "20240101"), (r"C:\SP2XR\20240101\SP2_20240101_003.csv", "20240101_003", "20240101"), ] for file_path, expected_cut, expected_folder in test_cases: file_name_cut, folder_name = extract_sp2xr_filename_parts(file_path) assert file_name_cut == expected_cut, f"Failed for {file_path}" assert folder_name == expected_folder, f"Failed for {file_path}" class TestExtractSP2XRFilenamePartsIntegration: """Integration tests using existing test files in the repository.""" @pytest.fixture def test_data_dir(self): """Get the path to the test data directory.""" # Get the directory where this test file is located, then go to data return Path(__file__).parent / "data" def test_with_real_pbp_files(self, test_data_dir): """Test function with actual PbP test files.""" pbp_dir = test_data_dir / "pbp_files_test" if not pbp_dir.exists(): pytest.skip(f"Test data directory not found: {pbp_dir}") # Find actual PbP files in the test directory pbp_files = list(pbp_dir.glob("**/*PbP*")) + list(pbp_dir.glob("**/*pbp*")) if not pbp_files: pytest.skip("No PbP test files found") for pbp_file in pbp_files[:3]: # Test first 3 files to avoid long test times file_name_cut, folder_name = extract_sp2xr_filename_parts(pbp_file) # Verify the function doesn't crash and returns reasonable values assert isinstance(file_name_cut, str) assert isinstance(folder_name, str) assert len(file_name_cut) > 0 assert len(folder_name) > 0 print( f"PbP File: {pbp_file.name} -> Cut: {file_name_cut}, Folder: {folder_name}" ) def test_with_real_hk_files(self, test_data_dir): """Test function with actual HK test files.""" hk_dir = test_data_dir / "hk_files_test" if not hk_dir.exists(): pytest.skip(f"Test data directory not found: {hk_dir}") # Find actual HK files in the test directory hk_files = list(hk_dir.glob("**/*hk*")) + list(hk_dir.glob("**/*HK*")) if not hk_files: pytest.skip("No HK test files found") for hk_file in hk_files[:3]: # Test first 3 files file_name_cut, folder_name = extract_sp2xr_filename_parts(hk_file) # Verify the function doesn't crash and returns reasonable values assert isinstance(file_name_cut, str) assert isinstance(folder_name, str) assert len(file_name_cut) > 0 assert len(folder_name) > 0 print( f"HK File: {hk_file.name} -> Cut: {file_name_cut}, Folder: {folder_name}" ) def test_with_all_test_files(self, test_data_dir): """Test function with any CSV/ZIP files in test data.""" if not test_data_dir.exists(): pytest.skip(f"Test data directory not found: {test_data_dir}") # Find all CSV and ZIP files in test data test_files = [] test_files.extend(test_data_dir.glob("**/*.csv")) test_files.extend(test_data_dir.glob("**/*.zip")) if not test_files: pytest.skip("No test files found") successful_extractions = 0 for test_file in test_files: try: file_name_cut, folder_name = extract_sp2xr_filename_parts(test_file) # Basic validation assert isinstance(file_name_cut, str) assert isinstance(folder_name, str) successful_extractions += 1 print(f"✓ {test_file.name} -> {file_name_cut} | {folder_name}") except Exception as e: print(f"✗ Failed on {test_file.name}: {e}") # Don't fail the test, just report issues # Ensure we processed at least some files successfully assert successful_extractions > 0, "No files were processed successfully" print( f"\nSuccessfully processed {successful_extractions}/{len(test_files)} files" ) class TestRealFilenamePatterns: """Analyze actual filename patterns in test data.""" @pytest.fixture def test_data_dir(self): return Path(__file__).parent / "data" def test_analyze_filename_patterns(self, test_data_dir): """Analyze and report on actual filename patterns in test data.""" if not test_data_dir.exists(): pytest.skip(f"Test data directory not found: {test_data_dir}") patterns = {} all_files = list(test_data_dir.glob("**/*.*")) for file_path in all_files: if file_path.suffix in [".csv", ".zip"]: filename = file_path.name parts = filename.split("_") pattern = f"{len(parts)} parts: {' | '.join(parts[:3])}{'...' if len(parts) > 3 else ''}" if pattern not in patterns: patterns[pattern] = [] patterns[pattern].append(filename) print("\nFilename patterns found:") for pattern, files in patterns.items(): print(f"\n{pattern}") for file in files[:3]: # Show first 3 examples print(f" - {file}") if len(files) > 3: print(f" ... and {len(files) - 3} more") if __name__ == "__main__": # Allow running the test file directly pytest.main([__file__, "-v", "-s"])