Module diffpy.mpdf.simpleparser
Parser that can be used for CIF and MCIF files.
Expand source code
#!/usr/bin/env python
##############################################################################
#
# diffpy.mpdf by Frandsen Group
# Benjamin A. Frandsen benfrandsen@byu.edu
# (c) 2022 Benjamin Allen Frandsen
# All rights reserved
#
# File coded by: Victor Velasco
#
# See AUTHORS.txt for a list of people who contributed.
# See LICENSE.txt for license information.
#
##############################################################################
"""Parser that can be used for CIF and MCIF files."""
import re
class SimpleParser:
"""Extract information from a text file, typically CIF or MCIF.
This class reads in a given text file (typically a CIF or MCIF file) and
pulls out relevant information in the form of a python dictionary.
"""
def __init__(self, file_dict):
self.file_dict = file_dict
def ReadFile(self, filename):
file = open(filename, "r")
while True:
line = file.readline()
if not line:
break
line = line.split()
if not line:
continue
self.parse(line, file)
file.close()
return self.file_dict
def parse(self, line, file):
if line[0].find("_") == 0:
self.insert_dictionary(line)
elif line[0] == "loop_":
keys = []
values = {}
counter = 0
while True:
l = file.readline()
l = l.strip()
if not l:
break
if l.find("_") == 0:
keys.append(l)
else:
if '[' in l:
l = l.split(' ',1)
l[1] = l[1].strip('[]')
temp = l[1].split()
l[1] = ''
for i, char in enumerate(temp):
l[1] = l[1] + char
if i < len(temp) - 1:
l[1] = l[1] + ','
else:
l = l.split()
values[counter] = l
counter += 1
self.insert_loop(keys, values)
def insert_loop(self, keys, values):
for i in values:
for k in range(len(keys)):
if keys[k] in self.file_dict:
self.file_dict[keys[k]].append(self.to_numeric(values[i][k]))
continue
self.file_dict[keys[k]] = [self.to_numeric(values[i][k])]
def insert_dictionary(self, line):
if line[0] in self.file_dict:
self.file_dict[line[0]].append(self.to_numeric(self.to_string(line)))
return
self.file_dict[line[0]] = [self.to_numeric(self.to_string(line))]
def to_string(self, line):
str_1 = ""
if len(line) > 1:
if line[1][0] == '"':
line[1] = line[1].strip('"')
line[len(line)-1] = line[len(line)-1].strip('"')
elif line[1][0] == "'":
line[1] = line[1].strip("'")
line[len(line)-1] = line[len(line)-1].strip("'")
for i in range (1, len(line)):
str_1 += line[i] + " "
return str_1.strip()
def to_numeric(self, x):
if re.search(r'\(\d+\)$',x):
x = x[:x.index('(')]
if x.isdigit() == True or self.check_float(x) == True:
return float(x)
return x
def check_float(self, num):
try:
float(num)
return True
except ValueError:
return False
Classes
class SimpleParser (file_dict)
-
Extract information from a text file, typically CIF or MCIF.
This class reads in a given text file (typically a CIF or MCIF file) and pulls out relevant information in the form of a python dictionary.
Expand source code
class SimpleParser: """Extract information from a text file, typically CIF or MCIF. This class reads in a given text file (typically a CIF or MCIF file) and pulls out relevant information in the form of a python dictionary. """ def __init__(self, file_dict): self.file_dict = file_dict def ReadFile(self, filename): file = open(filename, "r") while True: line = file.readline() if not line: break line = line.split() if not line: continue self.parse(line, file) file.close() return self.file_dict def parse(self, line, file): if line[0].find("_") == 0: self.insert_dictionary(line) elif line[0] == "loop_": keys = [] values = {} counter = 0 while True: l = file.readline() l = l.strip() if not l: break if l.find("_") == 0: keys.append(l) else: if '[' in l: l = l.split(' ',1) l[1] = l[1].strip('[]') temp = l[1].split() l[1] = '' for i, char in enumerate(temp): l[1] = l[1] + char if i < len(temp) - 1: l[1] = l[1] + ',' else: l = l.split() values[counter] = l counter += 1 self.insert_loop(keys, values) def insert_loop(self, keys, values): for i in values: for k in range(len(keys)): if keys[k] in self.file_dict: self.file_dict[keys[k]].append(self.to_numeric(values[i][k])) continue self.file_dict[keys[k]] = [self.to_numeric(values[i][k])] def insert_dictionary(self, line): if line[0] in self.file_dict: self.file_dict[line[0]].append(self.to_numeric(self.to_string(line))) return self.file_dict[line[0]] = [self.to_numeric(self.to_string(line))] def to_string(self, line): str_1 = "" if len(line) > 1: if line[1][0] == '"': line[1] = line[1].strip('"') line[len(line)-1] = line[len(line)-1].strip('"') elif line[1][0] == "'": line[1] = line[1].strip("'") line[len(line)-1] = line[len(line)-1].strip("'") for i in range (1, len(line)): str_1 += line[i] + " " return str_1.strip() def to_numeric(self, x): if re.search(r'\(\d+\)$',x): x = x[:x.index('(')] if x.isdigit() == True or self.check_float(x) == True: return float(x) return x def check_float(self, num): try: float(num) return True except ValueError: return False
Methods
def ReadFile(self, filename)
-
Expand source code
def ReadFile(self, filename): file = open(filename, "r") while True: line = file.readline() if not line: break line = line.split() if not line: continue self.parse(line, file) file.close() return self.file_dict
def check_float(self, num)
-
Expand source code
def check_float(self, num): try: float(num) return True except ValueError: return False
def insert_dictionary(self, line)
-
Expand source code
def insert_dictionary(self, line): if line[0] in self.file_dict: self.file_dict[line[0]].append(self.to_numeric(self.to_string(line))) return self.file_dict[line[0]] = [self.to_numeric(self.to_string(line))]
def insert_loop(self, keys, values)
-
Expand source code
def insert_loop(self, keys, values): for i in values: for k in range(len(keys)): if keys[k] in self.file_dict: self.file_dict[keys[k]].append(self.to_numeric(values[i][k])) continue self.file_dict[keys[k]] = [self.to_numeric(values[i][k])]
def parse(self, line, file)
-
Expand source code
def parse(self, line, file): if line[0].find("_") == 0: self.insert_dictionary(line) elif line[0] == "loop_": keys = [] values = {} counter = 0 while True: l = file.readline() l = l.strip() if not l: break if l.find("_") == 0: keys.append(l) else: if '[' in l: l = l.split(' ',1) l[1] = l[1].strip('[]') temp = l[1].split() l[1] = '' for i, char in enumerate(temp): l[1] = l[1] + char if i < len(temp) - 1: l[1] = l[1] + ',' else: l = l.split() values[counter] = l counter += 1 self.insert_loop(keys, values)
def to_numeric(self, x)
-
Expand source code
def to_numeric(self, x): if re.search(r'\(\d+\)$',x): x = x[:x.index('(')] if x.isdigit() == True or self.check_float(x) == True: return float(x) return x
def to_string(self, line)
-
Expand source code
def to_string(self, line): str_1 = "" if len(line) > 1: if line[1][0] == '"': line[1] = line[1].strip('"') line[len(line)-1] = line[len(line)-1].strip('"') elif line[1][0] == "'": line[1] = line[1].strip("'") line[len(line)-1] = line[len(line)-1].strip("'") for i in range (1, len(line)): str_1 += line[i] + " " return str_1.strip()