Reputation: 986
Given a file with content like this:
{
"title": "Pilot",
"image": [
{
"resource": "http://images2.nokk.nocookie.net/__cb20110227141960/notr/images/8/8b/pilot.jpg",
"description": "not yet implemented"
}
],
"content": "<p>The pilot ...</p>"
},
{
"title": "Special Christmas (Part 1)",
"image": [
{
"resource": "http://images1.nat.nocookie.net/__cb20090519172121/obli/images/e/ed/SpecialChristmas.jpg",
"description": "not yet implemented"
}
],
"content": "<p>Last comment...</p>"
}
I have this script to replace all the values of resource, like this,
"resource": "http://images1.nat.nocookie.net/__cb20090519172121/obli/images/e/ed/SpecialChristmas.jpg"
for another like this: "../img/SpecialChristmas.jpg"
from StringIO import StringIO
import re
import urllib
infile = open('test2.txt')
outfile = open('test3.txt', 'w')
pattern = r'"resource": ".+/(.+).jpg"'
replacement = '"resource": "../img/\g<1>.jpg"'
prog = re.compile(".+/(.+).jpg")
for line in infile:
if prog.match(line):
print (line) #this prints nothing
text = re.sub(pattern, replacement, line)
outfile.write(text)
infile.close()
outfile.close
But I also want to print the value of every resource in the loop, like this:
"http://images1.nat.nocookie.net/__cb20090519172121/obli/images/e/ed/SpecialChristmas.jpg"
"http://images1.nat.nocookie.net/__cb20090519172121/obli/images/e/ed/SpecialChristmas.jpg"
What I'm doing doesn't work, so what would be the right way to print every resource value just in console?
Thanks in advance!
Upvotes: 1
Views: 125
Reputation: 1021
You can have groups within groups and just modify your original pattern
regex. This can get a bit confusing so it is easier to use named groups ie. (?P<group_name>pattern)
import re
import urllib
infile = open('test2.txt')
outfile = open('test3.txt', 'w')
pattern = r'"resource": "(?P<path>.+/(?P<filename>.+)\.jpg)"'
replacement = '"resource": "../img/\g<filename>.jpg"'
prog = re.compile(pattern)
for line in infile:
match = prog.match(line)
if match:
print (match.group('path'))
text = prog.sub(replacement, line)
outfile.write(text)
infile.close()
outfile.close
Upvotes: 1
Reputation: 77197
from json import dumps, loads
with open('that_file') as datfile:
dat = loads('[' + datfile.read() + ']') # Need some outer braces to make it valid JSON
for item in dat:
for img in item['image']:
if 'resource' in img:
# You may want to do a more sophisticated test here
# but this will do for an example
img['resource'] = 'http://example.org'
with open('that_file', 'w') as datfile:
datfile.write(dumps(dat, indent=4).strip('[]')) # Strip outer array braces in keeping with input. (Shrug)
Upvotes: 2
Reputation: 986
I ended up doing this:
from StringIO import StringIO
import re
import urllib
infile = open('test2.txt')
outfile = open('test4.txt', 'w')
pattern = r'"resource": ".+/(.+).jpg"'
replacement = '"resource": "../img/\g<1>.jpg"'
prog = re.compile(pattern)
for line in infile:
if prog.search(line):
url = line.split('"resource":')[1][2:][:-3]
print(url)
text = re.sub(pattern, replacement, line)
outfile.write(text)
infile.close()
outfile.close
it works but I think it doesn't look pythonic at all
Upvotes: 0