用Obspy读取segy的文件头并保存到csv数据库

易研

2023-12-01

用Obspy读取segy的文件头并保存到csv数据库

读入Segy文件

from obspy.io.segy.segy import _read_segy
filename = 'path.sgy';
segy = _read_segy(filename)

然后得到Segy文件（对象）

本文旨在读取segy的道头文件，因此，我们取其中一个trace的header检查一下都有哪些可以调用的函数或者数据。

检查头文件

参考官方给出的SEGYTraceHeader这个对象所拥有的属性

obspy.io.segy.segy.SEGYTraceHeader — ObsPy 1.3.1 documentation

同理，需要拿到SEGYFile的header也用同样的方法。

obspy.io.segy.segy.SEGYFile — ObsPy 1.3.1 documentation

用 __dir__()可以得到可调用list

segy.traces[0].header.__dir__()
>>
['endian',
 'unpacked_header',
 'number_of_samples_in_this_trace',
 'original_field_record_number',
 'energy_source_point_number',
 'trace_sequence_number_within_line',
 'trace_sequence_number_within_segy_file',
 'trace_number_within_the_original_field_record',
 'ensemble_number',
 'trace_number_within_the_ensemble',
 'trace_identification_code',
 'number_of_vertically_summed_traces_yielding_this_trace',
 'number_of_horizontally_stacked_traces_yielding_this_trace',
 'data_use',
 'distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group',
 'receiver_group_elevation',
 'surface_elevation_at_source',
 'source_depth_below_surface',
 'datum_elevation_at_receiver_group',
 'datum_elevation_at_source',
 'water_depth_at_source',
 'water_depth_at_group',
 'scalar_to_be_applied_to_all_elevations_and_depths',
 'scalar_to_be_applied_to_all_coordinates',
 'source_coordinate_x',
 'source_coordinate_y',
 'group_coordinate_x',
 'group_coordinate_y',
 'coordinate_units',
 'weathering_velocity',
 'subweathering_velocity',
 'uphole_time_at_source_in_ms',
 'uphole_time_at_group_in_ms',
 'source_static_correction_in_ms',
 'group_static_correction_in_ms',
 'total_static_applied_in_ms',
 'lag_time_A',
 'lag_time_B',
 'delay_recording_time',
 'mute_time_start_time_in_ms',
 'mute_time_end_time_in_ms',
 'sample_interval_in_ms_for_this_trace',
 'gain_type_of_field_instruments',
 'instrument_gain_constant',
 'instrument_early_or_initial_gain',
 'correlated',
 'sweep_frequency_at_start',
 'sweep_frequency_at_end',
 'sweep_length_in_ms',
 'sweep_type',
 'sweep_trace_taper_length_at_start_in_ms',
 'sweep_trace_taper_length_at_end_in_ms',
 'taper_type',
 'alias_filter_frequency',
 'alias_filter_slope',
 'notch_filter_frequency',
 'notch_filter_slope',
 'low_cut_frequency',
 'high_cut_frequency',
 'low_cut_slope',
 'high_cut_slope',
 'year_data_recorded',
 'day_of_year',
 'hour_of_day',
 'minute_of_hour',
 'second_of_minute',
 'time_basis_code',
 'trace_weighting_factor',
 'geophone_group_number_of_roll_switch_position_one',
 'geophone_group_number_of_trace_number_one',
 'geophone_group_number_of_last_trace',
 'gap_size',
 'over_travel_associated_with_taper',
 'x_coordinate_of_ensemble_position_of_this_trace',
 'y_coordinate_of_ensemble_position_of_this_trace',
 'for_3d_poststack_data_this_field_is_for_in_line_number',
 'for_3d_poststack_data_this_field_is_for_cross_line_number',
 'shotpoint_number',
 'scalar_to_be_applied_to_the_shotpoint_number',
 'trace_value_measurement_unit',
 'transduction_constant_mantissa',
 'transduction_constant_exponent',
 'transduction_units',
 'device_trace_identifier',
 'scalar_to_be_applied_to_times',
 'source_type_orientation',
 'source_energy_direction_mantissa',
 'source_energy_direction_exponent',
 'source_measurement_mantissa',
 'source_measurement_exponent',
 'source_measurement_unit',
 '__module__',
 '__doc__',
 '__init__',
 '_read_trace_header',
 'write',
 '__getattr__',
 '__str__',
 '_repr_pretty_',
 '_create_empty_trace_header',
 '__dict__',
 '__weakref__',
 '__repr__',
 '__hash__',
 '__getattribute__',
 '__setattr__',
 '__delattr__',
 '__lt__',
 '__le__',
 '__eq__',
 '__ne__',
 '__gt__',
 '__ge__',
 '__new__',
 '__reduce_ex__',
 '__reduce__',
 '__subclasshook__',
 '__init_subclass__',
 '__format__',
 '__sizeof__',
 '__dir__',
 '__class__']

为了得到所有的key，可以采用__dict__ ，得到用于保存csv的所有key

segy.traces[0].header.__dict__.keys()
>>
dict_keys(['endian', 'unpacked_header', 'number_of_samples_in_this_trace', 'original_field_record_number', 'energy_source_point_number', 'trace_sequence_number_within_line', 'trace_sequence_number_within_segy_file', 'trace_number_within_the_original_field_record', 'ensemble_number', 'trace_number_within_the_ensemble', 'trace_identification_code', 'number_of_vertically_summed_traces_yielding_this_trace', 'number_of_horizontally_stacked_traces_yielding_this_trace', 'data_use', 'distance_from_center_of_the_source_point_to_the_center_of_the_receiver_group', 'receiver_group_elevation', 'surface_elevation_at_source', 'source_depth_below_surface', 'datum_elevation_at_receiver_group', 'datum_elevation_at_source', 'water_depth_at_source', 'water_depth_at_group', 'scalar_to_be_applied_to_all_elevations_and_depths', 'scalar_to_be_applied_to_all_coordinates', 'source_coordinate_x', 'source_coordinate_y', 'group_coordinate_x', 'group_coordinate_y', 'coordinate_units', 'weathering_velocity', 'subweathering_velocity', 'uphole_time_at_source_in_ms', 'uphole_time_at_group_in_ms', 'source_static_correction_in_ms', 'group_static_correction_in_ms', 'total_static_applied_in_ms', 'lag_time_A', 'lag_time_B', 'delay_recording_time', 'mute_time_start_time_in_ms', 'mute_time_end_time_in_ms', 'sample_interval_in_ms_for_this_trace', 'gain_type_of_field_instruments', 'instrument_gain_constant', 'instrument_early_or_initial_gain', 'correlated', 'sweep_frequency_at_start', 'sweep_frequency_at_end', 'sweep_length_in_ms', 'sweep_type', 'sweep_trace_taper_length_at_start_in_ms', 'sweep_trace_taper_length_at_end_in_ms', 'taper_type', 'alias_filter_frequency', 'alias_filter_slope', 'notch_filter_frequency', 'notch_filter_slope', 'low_cut_frequency', 'high_cut_frequency', 'low_cut_slope', 'high_cut_slope', 'year_data_recorded', 'day_of_year', 'hour_of_day', 'minute_of_hour', 'second_of_minute', 'time_basis_code', 'trace_weighting_factor', 'geophone_group_number_of_roll_switch_position_one', 'geophone_group_number_of_trace_number_one', 'geophone_group_number_of_last_trace', 'gap_size', 'over_travel_associated_with_taper', 'x_coordinate_of_ensemble_position_of_this_trace', 'y_coordinate_of_ensemble_position_of_this_trace', 'for_3d_poststack_data_this_field_is_for_in_line_number', 'for_3d_poststack_data_this_field_is_for_cross_line_number', 'shotpoint_number', 'scalar_to_be_applied_to_the_shotpoint_number', 'trace_value_measurement_unit', 'transduction_constant_mantissa', 'transduction_constant_exponent', 'transduction_units', 'device_trace_identifier', 'scalar_to_be_applied_to_times', 'source_type_orientation', 'source_energy_direction_mantissa', 'source_energy_direction_exponent', 'source_measurement_mantissa', 'source_measurement_exponent', 'source_measurement_unit'])

保存道头

# get dict keys
allkeys = segy.traces[0].header.__dict__.keys()

# get dict
onetraceheader = segy.traces[0].header.__dict__

保存道头到csv

import pandas as pd
from tqdm import tqdm
from obspy.io.segy.segy import _read_segy

# read in segy 
filename = 'path.sgy';
segy = _read_segy(filename)

## parameters
NameOut = 'NameOut' # 定义输出的prefix
NTr = 3*50*637 # 定义读取的trace数量

## save header
sshear = segy.__dict__
filename = open(f'{NameOut}_header.txt','w')  
for k,v in sshear.items():
    filename.write(k+':'+str(v))
    filename.write('\n')
filename.close()

## save trace header
alist = []
for ii in tqdm( range(NTr) ):
    tmp = segy.traces[ii].header.__str__() # ！不加此句有时会出现无法读取trace header的情况
    tmpTrace = segy.traces[ii].header.__dict__
    alist.append(tmpTrace) #先保存在list中，最后统一转为csv

newpd = pd.DataFrame(alist)
newpd = newpd.drop(columns=['unpacked_header'])
newpd.to_csv(f"{NameOut}_trace_header.csv")

用Obspy读取segy的文件头并保存到csv数据库

用Obspy读取segy的文件头并保存到csv数据库

读入Segy文件

检查头文件

保存道头

保存道头到csv

相关阅读

相关文章

相关问答

相关文档