import sys import pysam genes_and_proteins = set() with open(sys.argv[1], 'r') as file: for line in file: gene, protein = line.strip().split('\t') genes_and_proteins.add((gene, protein)) with pysam.VariantFile('/dataseq/jmdna/database/cosmic/v91_GRCh37/CosmicCodingMuts.vcf', 'r') as vcf_file: for chr in range(1, 23): vcf_file.header.add_line(f'##contig=') for chr in ['X', 'Y', 'MT']: vcf_file.header.add_line(f'##contig=') output_vcf = pysam.VariantFile('hotspot_snv.vcf', 'w', header=vcf_file.header) record_list = set() for record in vcf_file: gene = record.info.get('GENE', '') protein = record.info.get('AA', '') for (g, p) in genes_and_proteins: if g in gene and p in protein: output_vcf.write(record) record_list.add((g, p)) no_record = genes_and_proteins.difference(record_list) print('没有获取到记录的有:', no_record) output_vcf.close()