Reputation: 13
let me start off by saying, its possible I am attempting to use a terrible data structure.
Im trying to get information out of a large text dump and cant seem to get it sorted right. Data looks like the below, but is much longer.
r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8
I thought I could end up getting it into a data structure like..
r1_list = [
r01: [77.7,76.0,76.0,76.0],
r04: [69.5,4,4,5],
]
r2_list = [
r02: [1,2,3,4],
r04: [3,4,4,5],
]
Then I could loop through the lists, and check the mean etc of the values per device.
Here is what ive been trying
import re
r1_list = []
r2_list = []
current_device = False
device_type = False
current_reading = False
def matchr1(line):
matchThis = ""
matched = re.match(r'^(r1)\s(r\d+)$',line)
if matched:
#Matches r1
matchThis = matched.group(2)
else:
return False
return matchThis
def matchr2(line):
matchThis = ""
matched = re.match(r'^(r2)\s(r\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
def matchReading(line):
matchThis = ""
matched = re.match(r'^(\d+)\s(\d+.\d+)$',line)
if matched:
#Matches r2
matchThis = matched.group(2)
else:
return False
return matchThis
with open("data.txt") as f:
for line in f:
if matchr1(line):
current_device = matchr1(line)
device_type = "r1"
if matchr2(line):
current_device = matchr2(line)
device_type = "r2"
if matchReading(line):
current_reading = matchReading(line)
if current_reading:
if device_type == "r1":
temp_dict = {current_device: [current_reading]}
r1_list.append(temp_dict)
if device_type == "r2":
temp_dict = {current_device: [current_reading]}
r2_list.append(temp_dict)
current_reading = False
print(r1_list)
print(r2_list)
What I get
[{'r01': ['77.7']}, {'r01': ['76.0']}, {'r01': ['77.7']}, {'r04': ['74.6']}, {'r04': ['75.6']}, {'r04': ['75.8']}]
[{'r02': ['74.7']}, {'r02': ['74.0']}, {'r02': ['76.7']}, {'r03': ['74.2']}, {'r03': ['74.1']}, {'r03': ['76.8']}]
Upvotes: 1
Views: 66
Reputation: 30957
There are two separate steps here:
Here's what I came up with:
#!/usr/bin/env python
data = """r1 r01
2020 77.7
2020 76.0
2020 77.7
r2 r02
2020 74.7
2020 74.0
2020 76.7
r2 r03
2020 74.2
2020 74.1
2020 76.8
r1 r04
2020 74.6
2020 75.6
2020 75.8"""
result = {}
for line in data.splitlines():
if line.startswith("r"):
# Find (or create) the place in the data structure where
# we should insert the values.
first, second = line.split()
# dict.setdefault(key, value) sets `dict[key] = value` if
# it's not already set, then returns `dict[key]`.
dest = result.setdefault(first, {}).setdefault(second, [])
# Move on to the next line.
continue
# Get the value of the line
_, value = line.split()
# Add it to the list we found in the `line.startswith('r')`
# bit above.
dest.append(value)
assert result == {
"r1": {
"r01": ["77.7", "76.0", "77.7"],
"r04": ["74.6", "75.6", "75.8"]
},
"r2": {
"r02": ["74.7", "74.0", "76.7"],
"r03": ["74.2", "74.1", "76.8"]
},
}
Upvotes: 1