This commit is contained in:
Dustin Loring 2026-03-06 11:22:44 -05:00 committed by GitHub
commit f9fc48ca88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,24 @@
import json
import os
os.chdir(r'C:\tmp')
input_file = 'identity_conversations.jsonl'
output_file = 'identity_conversations_sharegpt.jsonl'
count = 0
with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
for line in f_in:
messages = json.loads(line.strip())
converted = {'conversations': []}
for msg in messages:
role = 'human' if msg['role'] == 'user' else 'gpt'
converted['conversations'].append({
'from': role,
'value': msg['content']
})
f_out.write(json.dumps(converted, ensure_ascii=False) + '\n')
count += 1
print(f'Converted {count} conversations to ShareGPT format')
print(f'Output saved to: {output_file}')